In [1]:
import pandas as pd

# Plotting
# import plotly.express as px
import plotly.graph_objects as go
import json


# Input Data

In [2]:
tflFares = pd.read_csv("data/fares/TfLHistoricalFares2000to2025.csv")
tflFares

Unnamed: 0,year,singleZ1to4Cash,singleZ1to4OysterPeak,busCash,singleBusOyster,capBusTram,travelcardZ1to4,capZ1to4PAYG,travelcard7DayZ1to4,weeklyBusAndTramPass
0,2000,2.6,,1.00,,,,,26.8,11.5
1,2001,2.7,,1.00,,,,,27.6,9.5
2,2002,2.7,,1.00,,,6.8,,28.1,8.5
3,2003,2.8,,1.00,,,7.0,,28.4,8.5
4,2004,3.0,2.8,1.00,0.7,,7.3,,29.2,9.5
5,2005,2.8,2.5,1.20,1.0,3.0,8.0,,30.4,11.0
6,2006,3.0,2.5,1.50,1.0,3.0,8.4,,31.6,13.5
7,2007,4.0,2.5,2.00,1.0,3.0,9.0,,33.2,14.0
8,2008,4.0,2.5,2.00,0.9,3.0,9.4,,34.6,13.0
9,2009,4.0,2.8,2.00,1.0,3.3,10.0,,36.8,13.8


In [3]:
railFares = pd.read_csv("data/fares/railFares.csv", skiprows=7)
railFares
# The prices are broken down
# - yearly
# - quarterly
# - monthly

Unnamed: 0,Important notes,Unnamed: 1
0,1987,100.6
1,1988,107.6
2,1989,117.4
3,1990,127.7
4,1991,141.0
...,...,...
642,2024 SEP,515.0
643,2024 OCT,523.4
644,2024 NOV,519.5
645,2024 DEC,519.6


In [4]:
busFares = pd.read_csv("data/fares/busAndCoachFares.csv", skiprows=7)
busFares
# Same breakdown as railFares

Unnamed: 0,Important notes,Unnamed: 1
0,1987,103.4
1,1988,110.6
2,1989,119.3
3,1990,125.9
4,1991,143.6
...,...,...
642,2024 SEP,634.7
643,2024 OCT,630.1
644,2024 NOV,631.2
645,2024 DEC,647.1


In [5]:
# Need the following
# - Real wages

# Charts

In [6]:
# UTILS

LINE_AND_MARKER = "lines+markers"

def create_trace(x, y, label: str, mode=LINE_AND_MARKER, line_shape=None) -> go.Scatter:
    return go.Scatter(x=x, y=y, mode=mode, name=label, line_shape=line_shape)

def create_layout(title: str, type=None):
    return dict(
        title=title,
        width = 960,
        height = 500,
        xaxis=dict(
            showgrid=False,
            linecolor="#7f7f7f",
            linewidth=2,
            ticks='outside',
            type=type
        ),
        showlegend=True,
        plot_bgcolor='white'
    )

def normalise_single_series_to_100(df):
    return (df - df.min())/(df.max()-df.min()) * 100

def normalise_series_to_first_value(df):
    return (df / df.iloc[0])

## Tube Fares

In [7]:
fig = go.Figure()
# TRACES
fig.add_trace(create_trace(x=tflFares['year'], y=tflFares['singleZ1to4OysterPeak'], label="Contactless ticket"))
fig.add_trace(create_trace(x=tflFares['year'], y=tflFares['singleZ1to4Cash'], label="Paper ticket"))
# LAYOUT
layout = create_layout(title="TfL Tube Fares <br><sup>Zone 1-4 travel during peak hours</sup>")
fig.update_layout(layout)

fig.show()

# TODO: add annotiations for starting and final prices
# Follow code here: https://plotly.com/python/line-charts/ -> section Label Lines with Annotations

# 2000 - 2.6, 2012 - 5.3 for paper and 3.6 for contactless, 2025 - 7 for paper and 4.6 for contactless -> +38.46%, +76.92% using contactless
# Average Weekly Earnings (AWE) comparison: 2000 - £305, 2012 - £457, 2025 - £711 -> 49.83%, 133.11%
# https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/earningsandworkinghours/timeseries/kab9/emp
# Median weekly earnings for full-time employees: 2012 - £506, 2025 - £728
# https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/earningsandworkinghours/bulletins/annualsurveyofhoursandearnings/2024

In [8]:
# tflFares["singleZ1to4OysterPeak"][10:] / tflFares["singleZ1to4OysterPeak"][10:].iloc[0]
normalise_series_to_first_value(tflFares["singleZ1to4OysterPeak"][10:]) * 100 - 100

10     0.000000
11     9.677419
12    16.129032
13    22.580645
14    22.580645
15    25.806452
16    25.806452
17    25.806452
18    25.806452
19    25.806452
20    25.806452
21    29.032258
22    38.709677
23    41.935484
24    41.935484
25    48.387097
Name: singleZ1to4OysterPeak, dtype: float64

In [9]:
fig = go.Figure()
# TRACES
fig.add_trace(create_trace(x=tflFares['year'][10:], y=normalise_series_to_first_value(tflFares["singleZ1to4OysterPeak"][10:]) * 100 - 100, label="Contactless ticket"))
fig.add_trace(create_trace(x=tflFares['year'][10:], y=normalise_series_to_first_value(tflFares["singleZ1to4Cash"][10:]) * 100 - 100, label="Paper ticket"))
# fig.add_trace(create_trace(x=tflFares['year'][10:], y=normalise_single_series_to_100(tflFares["singleZ1to4OysterPeak"][10:]), label="Contactless ticket"))
# fig.add_trace(create_trace(x=tflFares['year'][10:], y=normalise_single_series_to_100(tflFares["singleZ1to4Cash"][10:]), label="Paper ticket"))
# LAYOUT
layout = create_layout(title="Normalised TfL Tube Fares <br><sup>Zone 1-4 travel during peak hours</sup>")
fig.update_layout(layout)

fig.show()

In [10]:
# Get pay-as-you-go caps as well
# How many journeys are required to reach the cap? -> could put it as barchart underneath
# tflFares['capZ1to4PAYG']/tflFares['singleZ1to4OysterPeak']

In [11]:
fig = go.Figure()
# TRACES
fig.add_trace(create_trace(x=tflFares['year'], y=tflFares['capZ1to4PAYG'], label="PAYG Cap"))
# fig.add_trace(create_trace(x=tflFares['year'], y=tflFares['travelcard7DayZ1to4'], label="travelcard7DayZ1to4"))
# LAYOUT
layout = create_layout(title="TfL Tube Fares <br><sup>Zone 1-4 travel during peak hours</sup>")
fig.update_layout(layout)

fig.update_xaxes(range=[2012,2025])

fig.show()

## FTSE100

In [12]:
data = json.load(open("data/ftse100from2000to2025.json"))
ftse100 = pd.DataFrame(data['data'])
ftse100
# Month end value are reported from 2000 Jan to 2025 March + mid April

FileNotFoundError: [Errno 2] No such file or directory: 'data/ftse100from2000to2025.json'

In [None]:
ftse100['_DATE_END'] = pd.to_datetime(ftse100['_DATE_END'])
ftse100['year'] = ftse100['_DATE_END'].dt.year
# ftse100['year'] = ftse100['_DATE_END'].apply(lambda x: int(x.split('-')[0]))

ftse_yearly_max_prices = ftse100.groupby("year", as_index=False).max() # HALO this is stupid
ftse_yearly_max_prices = ftse_yearly_max_prices.sort_values("year")

ftse_yearly_min_prices = ftse100.groupby("year", as_index=False).min()
ftse_yearly_min_prices = ftse_yearly_min_prices.sort_values("year")

ftse_yearly_min_prices.head()

Unnamed: 0,year,_DATE_END,LOW_1,CLOSE_PRC,HIGH_1,OPEN_PRC
0,2000,2000-01-31,5915.2,6142.19,6419.9,6142.2
1,2001,2001-01-31,4219.8,4903.39,5279.8,4903.4
2,2002,2002-01-31,3609.9,3721.75,4197.5,3721.8
3,2003,2003-01-31,3277.5,3567.41,3747.0,3567.4
4,2004,2004-01-31,4283.0,4385.67,4487.9,4385.7


In [None]:
fig = go.Figure()
# TRACES
fig.add_trace(create_trace(x=ftse_yearly_max_prices['year'][10:], y=ftse_yearly_max_prices['HIGH_1'].astype(float)[10:], label="Highest annual price"))
fig.add_trace(create_trace(x=ftse_yearly_min_prices['year'][10:], y=ftse_yearly_min_prices['LOW_1'].astype(float)[10:], label="Lowest annual price"))
# LAYOUT
layout = create_layout(title="FTSE100<br><sup>UK's stock market's index innit</sup>", type="linear")
fig.update_layout(layout)

fig.show()

In [None]:
fig = go.Figure()
# TRACES
fig.add_trace(create_trace(x=ftse_yearly_max_prices['year'][10:], y=normalise_single_series_to_100(ftse_yearly_max_prices['LOW_1'].astype(float)[10:]), label="Lowest annual price"))
# LAYOUT
layout = create_layout(title="Normalised FTSE100<br><sup>UK's stock market's index innit</sup>", type="linear")
fig.update_layout(layout)

# fig.update_xaxes(range=[2010,2025])

fig.show()

In [None]:
# Could factor in inflation using CPIH
# CPIH stands for the Consumer Prices Index including owner occupiers' housing costs, which is the UK's leading measure of inflation
# Source: https://www.ons.gov.uk/economy/inflationandpriceindices/timeseries/l55o/mm23
cpih = pd.read_csv("./data/CPIH-annual-rate-1989-2024-by-year.csv", skiprows=7)
n_years = 2024 - 1989 + 1
cpih.iloc[:n_years].tail()

Unnamed: 0,key,annual_rate
31,2020,1.0
32,2021,2.5
33,2022,7.9
34,2023,6.8
35,2024,3.3


## Combined Charts

In [None]:
fig = go.Figure()

# TODO - identifier multiplier dynamically
fares_normalised = normalise_series_to_first_value(tflFares["singleZ1to4OysterPeak"][10:])
stocks_normalised = normalise_series_to_first_value(ftse_yearly_max_prices['HIGH_1'].astype(float)[10:])

# TRACES
fig.add_trace(create_trace(x=tflFares['year'][10:], y=fares_normalised * 200 - 200, label="Contactless ticket"))
fig.add_trace(create_trace(x=ftse_yearly_max_prices['year'][10:], y=stocks_normalised * 200 - 200, label="FTSE100 highs"))
# fig.add_trace(create_trace(x=tflFares['year'][10:], y=normalise_series_to_first_value(tflFares["singleZ1to4Cash"][10:]) * 100 - 100, label="Paper ticket"))
# fig.add_trace(create_trace(x=ftse_yearly_min_prices['year'][10:], y=normalise_series_to_first_value(ftse_yearly_max_prices['LOW_1'].astype(float)[10:]) * 100 - 100, label="FTSE100 lows"))

# LAYOUT
layout = create_layout(title="TfL Tube Fares vs the FTSE100<br><sup>Prices are normalised between 0-100</sup>")
fig.update_layout(layout)

fig.show()

In [None]:
m1 = fares_normalised.max()
m2 = stocks_normalised.max()
m = max(m1, m2)
print(m)

fares_normalised

1.4838709677419353


10    1.000000
11    1.096774
12    1.161290
13    1.225806
14    1.225806
15    1.258065
16    1.258065
17    1.258065
18    1.258065
19    1.258065
20    1.258065
21    1.290323
22    1.387097
23    1.419355
24    1.419355
25    1.483871
Name: singleZ1to4OysterPeak, dtype: float64

In [None]:
# Create step non-decreasing step function
fig = go.Figure()

fares_normalised = normalise_series_to_first_value(tflFares["singleZ1to4OysterPeak"][10:]) * 200 - 200
stocks_normalised = normalise_series_to_first_value(ftse_yearly_max_prices['HIGH_1'].astype(float)[10:]) * 200 - 200
# TRACES
fig.add_trace(create_trace(
    x=tflFares['year'][10:],
    y=fares_normalised, 
    label="Contactless ticket (PAYG)",
    line_shape="hv"
))
fig.add_trace(create_trace(
    x=ftse_yearly_max_prices['year'][10:], 
    y=stocks_normalised, 
    label="FTSE100 annual highs",
    line_shape="hv"
))

# LAYOUT
layout = create_layout(title="TfL Tube Fares vs the FTSE100<br><sup>Prices are normalised between 0-100</sup>")
fig.update_layout(layout)

fig.show()

In [None]:
((fares_normalised - stocks_normalised) > 0).sum()
# 11 out of 15 years the tube prices rose above the FTSE100

# Reselling your tube tickets would reap in more revenue than investing it in the UK economy
#  through a stock ISA. Sadiq, Rachel - wanna chip in?

np.int64(11)

In [None]:
# Create plotly/streamlit chart where one can select the following from a dropdown
# - Stock Index -> Get S&P 500
# Tranportation mode -> we have average rail and bus/coach fares too from 

# Also need a dropdown for selecting years (2000-2025) and deal with missing values pls
# Though fares go back to 1987, and so do most indeces -> Did a search in perplexity

# Sources
# - Bus & coach: https://www.ons.gov.uk/economy/inflationandpriceindices/timeseries/docx/mm23
# - Train: https://www.ons.gov.uk/economy/inflationandpriceindices/timeseries/docw/mm23
# - S&P 500: ?
# - DAX: ?
# - Nikkei 225: ?
# - Hang Seng: ?