In [1]:
# Development - Transport Demand Module (TDM)

import pandas as pd

In [11]:
# import data - return TDSK
def return_country_data(country='Kenya', start_year=1990, end_year=2021):
    try:
        df = pd.read_excel("https://zenodo.org/records/10406893/files/TSDK_ALL.xlsx?download=1", sheet_name="Data")
        # country_data = df[df['Country name'] == country][['Country name', 'Variable', 'Data code', 'Unit'] + [y for y in range(
        #     start_year, end_year)]]
        country_data = df[['Country name', 'Variable', 'Data code', 'Unit'] + [y for y in range(
             start_year, end_year)]]
        return country_data
    except Exception as e:
        return f"an error occurred (probably the source data at URL has changed): {e}"

In [3]:
# these functions return vectors of particular variables - gdp, population, etc. - the ones that are *useful* from TSDK.

# return gdp per year (total) for country, for which data exists.
def return_gdp(df):
    gdp = pd.Series(df[df['Data code'] == 'GDP_TOT'].iloc[0])
    return gdp.dropna()


# return total population and share urb/rur
def return_pop(df):
    pop = df[df['Data code'].isin(['POP_TOT', 'POP_URB', 'POP_RUR'])]
    return pop.dropna()


# return pkm_road (total) for country, for which data exists. Amalgamate sources.
def return_pkm_road(df):
    pkm_road = (df[df['Data code'] == 'ROAD_PA_MOV'].set_index(['Country name', 'Data code']).stack().groupby
                (level=[1, 2]).first().unstack())
    return pkm_road


# return pkm_road (CAR) for country, for which data exists. Amalgamate sources.
def return_pkm_car(df):
    pkm_car = (df[df['Data code'] == 'ROAD_PA_CAR'].set_index(['Country name', 'Data code']).stack().groupby
                (level=[1, 2]).first().unstack())
    return pkm_car


# return pkm_road (BUS) for country, for which data exists. Amalgamate sources.
def return_pkm_bus(df):
    pkm_bus = (df[df['Data code'] == 'ROAD_PA_BUS'].set_index(['Country name', 'Data code']).stack().groupby
                (level=[1, 2]).first().unstack())
    return pkm_bus


# return pkm_road (MOTO) for country, for which data exists. Amalgamate sources.
def return_pkm_moto(df):
    pkm_moto = (df[df['Data code'] == 'ROAD_PA_MOTORC'].set_index(['Country name', 'Data code']).stack().groupby
                (level=[1, 2]).first().unstack())
    return pkm_moto


# return pkm_rail (total) for country, for which data exists. Amalgamate sources.
def return_pkm_rail(df):
    pkm_road = (df[df['Data code'] == 'ROAD_PA_MOV'].set_index(['Country name', 'Data code']).stack().groupby
                (level=[1, 2]).first().unstack())
    return pkm_road

In [4]:
# logical test whether there is mode split data in the TSDK
def mode_share_test(df):    
    if all(v in df['Data code'].tolist() for v in ['ROAD_PA_MOTORC', 'ROAD_PA_CAR', 'ROAD_PA_BUS']):
        return True
    else:
        return False

In [5]:
# dictionary to hold values of mode shares by distance (pkm based) by country and mode
# TODO: this is base-year data.
mode_share_road_pkm = {'Kenya': {'BUS': 0.55, 'CAR': 0.2, 'MOTO': 0.1, 'WALK': 0.08, 'BIKE': 0.02}}

In [12]:
country = 'Kenya'

df = return_country_data(country=country)        

In [7]:
# Generate dictionary of pkm
road_pkm_by_mode = {country: {mode: None for mode in list(mode_share_road_pkm[next(iter(mode_share_road_pkm))].keys())} for country in list(mode_share_road_pkm.keys())}

# run mode share test - if true, use TSDK data. If false, use mode_share_km data
if mode_share_test(df):
    
    # write in data from dictionary
    for mode in mode_share_road_pkm[country]:
        road_pkm_by_mode[country][mode] = mode_share_road_pkm[country][mode] * return_pkm_road(df)[max([f for f in return_pkm_road(df).columns if f in range(1990,2050)])].item()  # return most recent value
    
    # use TSDK for car, moto and bus (this block will rarely be used in TSDK 2023 version)
    if 'ROAD_PA_MOTORC' in df['Data code'].tolist():
        road_pkm_by_mode[country]['MOTO'] = return_pkm_moto(df)[max([f for f in return_pkm_moto(df).columns if f in range(1990,2050)])].item() * return_pkm_road(df)[max([f for f in return_pkm_road(df).columns if f in range(1990,2050)])].item()
    if 'ROAD_PA_BUS' in df['Data code'].tolist():
        road_pkm_by_mode[country]['BUS'] = return_pkm_bus(df)[max([f for f in return_pkm_bus(df).columns if f in range(1990,2050)])].item() * return_pkm_road(df)[max([f for f in return_pkm_road(df).columns if f in range(1990,2050)])].item()
    if 'ROAD_PA_CAR' in df['Data code'].tolist():
        road_pkm_by_mode[country]['CAR'] = return_pkm_car(df)[max([f for f in return_pkm_car(df).columns if f in range(1990,2050)])].item() * return_pkm_road(df)[max([f for f in return_pkm_road(df).columns if f in range(1990,2050)])].item()
else:
    for mode in mode_share_road_pkm[country]:
        road_pkm_by_mode[country][mode] = mode_share_road_pkm[country][mode] * return_pkm_road(df)[max([f for f in return_pkm_road(df).columns if f in range(1990,2050)])].item()  # return most recent value
        


In [8]:
road_pkm_by_mode

{'Kenya': {'BUS': 28866.790887000003,
  'CAR': 10497.014868,
  'MOTO': 5248.507434,
  'WALK': 4198.8059472,
  'BIKE': 1049.7014868}}

In [9]:
# this is from Gemini.

def travel_demand_timeseries(base_pkm_mode, gdp_timeseries, population_timeseries, elasticity_gdp_mode, elasticity_pop_mode):
  """
  This function calculates a timeseries of passenger-km by mode (travel demand)

  Args:
      base_pkm_mode (dict): Dictionary containing base year passenger-km for each mode (e.g., car, bus, train).
      gdp_timeseries (pd.Series): Timeseries of GDP values.
      population_timeseries (pd.Series): Timeseries of population values.
      elasticity_gdp_mode (pd.DataFrame): DataFrame containing elasticity of travel demand by mode with respect to GDP for each year.
      elasticity_pop_mode (pd.DataFrame): DataFrame containing elasticity of travel demand by mode with respect to population for each year.

  Returns:
      pd.DataFrame: Timeseries dataframe with passenger-km by mode for each year.
  """

  # Check if dictionaries and DataFrames have matching indices (years)
  if not (base_pkm_mode.keys() == gdp_timeseries.index).all():
    raise ValueError("Base year passenger-km and GDP timeseries must have matching years (indices).")
  if not (base_pkm_mode.keys() == population_timeseries.index).all():
    raise ValueError("Base year passenger-km and population timeseries must have matching years (indices).")
  if not (elasticity_gdp_mode.index == gdp_timeseries.index).all():
    raise ValueError("GDP timeseries and elasticity_gdp_mode must have matching years (indices).")
  if not (elasticity_pop_mode.index == population_timeseries.index).all():
    raise ValueError("Population timeseries and elasticity_pop_mode must have matching years (indices).")

  # Initialize empty dataframe to store results
  timeseries_pkm_mode = pd.DataFrame(columns=base_pkm_mode.keys())

  # Calculate passenger-km for each year
  for year in base_pkm_mode.keys():
    pkm_by_mode = {}
    for mode, base_pkm in base_pkm_mode.items():
      gdp_elasticity = elasticity_gdp_mode.loc[year, mode]
      pop_elasticity = elasticity_pop_mode.loc[year, mode]
      pkm_by_mode[mode] = base_pkm * (gdp_timeseries.loc[year] ** gdp_elasticity) * (population_timeseries.loc[year] ** pop_elasticity)
    timeseries_pkm_mode.loc[year] = pkm_by_mode

  return timeseries_pkm_mode


In [13]:
df

Unnamed: 0,Country name,Variable,Data code,Unit,1990,1991,1992,1993,1994,1995,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Angola,Population,POP_TOT,million people,11.848385,12.248901,12.657361,13.075044,13.503753,13.945205,...,24.220660,25.107925,26.015786,26.941773,27.884380,28.842482,29.816769,30.809787,31.825299,32.866268
1,Angola,Population,POP_URB,%,37.144000,38.580000,40.039000,41.511000,43.000000,44.169000,...,60.528000,61.268000,62.002000,62.731000,63.446000,64.149000,64.839000,65.514000,66.177000,66.825000
2,Angola,Population,POP_RUR,%,62.856000,61.420000,59.961000,58.489000,57.000000,55.831000,...,39.472000,38.732000,37.998000,37.269000,36.554000,35.851000,35.161000,34.486000,33.823000,33.175000
3,Angola,Population growth,POP_GRO,%,3.378411,3.324465,3.280272,3.246643,3.226227,3.216807,...,3.634150,3.597755,3.551997,3.497447,3.438869,3.378269,3.322158,3.276145,3.242914,3.218530
4,Angola,GDP,GDP_TOT,million USD (2015),26557.061824,26820.337727,25254.491117,19197.601089,19454.726739,22372.935756,...,72365.710633,78545.742323,82433.756566,86407.063631,87219.290029,84969.032349,84841.578801,83724.798831,83138.725241,78566.095350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2477,Zimbabwe,Energy intensity,NRG_EIP_MINIBUPETURB,MJ/passenger-km,,,,,,,...,,,0.660000,,,,,,,
2478,Zimbabwe,Load factor,NRG_LF_CARELEURB,passenger/vehicle,,,,,,,...,,,1.400000,,,,,,,
2479,Zimbabwe,Load factor,NRG_LF_CARHYBURB,passenger/vehicle,,,,,,,...,,,1.400000,,,,,,,
2480,Zimbabwe,Load factor,NRG_LF_CARPETURB,passenger/vehicle,,,,,,,...,,,1.400000,,,,,,,
