In [None]:
import pandas as pd
from pathlib import Path
import os
import plotly.graph_objects as go
import json


# Input Data

In [None]:
DATA_DIR = Path(os.path.abspath('')).parents[1] / "data"
FARE_PRICES_DIR = DATA_DIR / "fares"
STOCK_PRICES_DIR = DATA_DIR / "stocks"
STOCK_PRICES_DIR

In [None]:
tflFares = pd.read_csv(FARE_PRICES_DIR / "raw" / "TfLHistoricalFares2000to2025.csv")
tflFares

# Charts

In [None]:
# UTILS

LINE_AND_MARKER = "lines+markers"

def create_trace(x, y, label: str, mode=LINE_AND_MARKER, line_shape=None) -> go.Scatter:
    return go.Scatter(x=x, y=y, mode=mode, name=label, line_shape=line_shape)

def create_layout(title: str, type=None):
    return dict(
        title=title,
        width = 960,
        height = 500,
        xaxis=dict(
            showgrid=False,
            linecolor="#7f7f7f",
            linewidth=2,
            ticks='outside',
            type=type
        ),
        showlegend=True,
        plot_bgcolor='white'
    )

def normalise_single_series_to_100(df):
    return (df - df.min())/(df.max()-df.min()) * 100

def normalise_series_to_first_value(df):
    return (df / df.iloc[0])

## Tube Fares

In [None]:
fig = go.Figure()
# TRACES
fig.add_trace(create_trace(x=tflFares['year'], y=tflFares['singleZ1to4OysterPeak'], label="Contactless ticket"))
fig.add_trace(create_trace(x=tflFares['year'], y=tflFares['singleZ1to4Cash'], label="Paper ticket"))
# LAYOUT
layout = create_layout(title="TfL Tube Fares <br><sup>Zone 1-4 travel during peak hours</sup>")
fig.update_layout(layout)

fig.show()

# 2000 - 2.6, 2012 - 5.3 for paper and 3.6 for contactless, 2025 - 7 for paper and 4.6 for contactless -> +38.46%, +76.92% using contactless
# Average Weekly Earnings (AWE) comparison: 2000 - £305, 2012 - £457, 2025 - £711 -> 49.83%, 133.11%
# https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/earningsandworkinghours/timeseries/kab9/emp
# Median weekly earnings for full-time employees: 2012 - £506, 2025 - £728
# https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/earningsandworkinghours/bulletins/annualsurveyofhoursandearnings/2024

In [None]:
# tflFares["singleZ1to4OysterPeak"][10:] / tflFares["singleZ1to4OysterPeak"][10:].iloc[0]
normalise_series_to_first_value(tflFares["singleZ1to4OysterPeak"][10:]) * 100 - 100

In [None]:
fig = go.Figure()
# TRACES
fig.add_trace(create_trace(x=tflFares['year'][10:], y=normalise_series_to_first_value(tflFares["singleZ1to4OysterPeak"][10:]) * 100 - 100, label="Contactless ticket"))
fig.add_trace(create_trace(x=tflFares['year'][10:], y=normalise_series_to_first_value(tflFares["singleZ1to4Cash"][10:]) * 100 - 100, label="Paper ticket"))
# fig.add_trace(create_trace(x=tflFares['year'][10:], y=normalise_single_series_to_100(tflFares["singleZ1to4OysterPeak"][10:]), label="Contactless ticket"))
# fig.add_trace(create_trace(x=tflFares['year'][10:], y=normalise_single_series_to_100(tflFares["singleZ1to4Cash"][10:]), label="Paper ticket"))
# LAYOUT
layout = create_layout(title="Normalised TfL Tube Fares <br><sup>Zone 1-4 travel during peak hours</sup>")
fig.update_layout(layout)

fig.show()

In [None]:
# Get pay-as-you-go caps as well
# How many journeys are required to reach the cap? -> could put it as barchart underneath
# tflFares['capZ1to4PAYG']/tflFares['singleZ1to4OysterPeak']

In [None]:
fig = go.Figure()
# TRACES
fig.add_trace(create_trace(x=tflFares['year'], y=tflFares['capZ1to4PAYG'], label="PAYG Cap"))
# fig.add_trace(create_trace(x=tflFares['year'], y=tflFares['travelcard7DayZ1to4'], label="travelcard7DayZ1to4"))
# LAYOUT
layout = create_layout(title="TfL Tube Fares <br><sup>Zone 1-4 travel during peak hours</sup>")
fig.update_layout(layout)

fig.update_xaxes(range=[2012,2025])

fig.show()

## FTSE100

In [None]:
data = json.load(open(STOCK_PRICES_DIR / "ftse100from2000to2025.json"))
ftse100 = pd.DataFrame(data['data'])
ftse100
# Month end value are reported from 2000 Jan to 2025 March + mid April

In [None]:
ftse100['_DATE_END'] = pd.to_datetime(ftse100['_DATE_END'])
ftse100['year'] = ftse100['_DATE_END'].dt.year
# ftse100['year'] = ftse100['_DATE_END'].apply(lambda x: int(x.split('-')[0]))

ftse_yearly_max_prices = ftse100.groupby("year", as_index=False).max() # HALO this is stupid
ftse_yearly_max_prices = ftse_yearly_max_prices.sort_values("year")

ftse_yearly_min_prices = ftse100.groupby("year", as_index=False).min()
ftse_yearly_min_prices = ftse_yearly_min_prices.sort_values("year")

ftse_yearly_min_prices.head()

In [None]:
fig = go.Figure()
# TRACES
fig.add_trace(create_trace(x=ftse_yearly_max_prices['year'][10:], y=ftse_yearly_max_prices['HIGH_1'].astype(float)[10:], label="Highest annual price"))
fig.add_trace(create_trace(x=ftse_yearly_min_prices['year'][10:], y=ftse_yearly_min_prices['LOW_1'].astype(float)[10:], label="Lowest annual price"))
# LAYOUT
layout = create_layout(title="FTSE100<br><sup>UK's stock market's index innit</sup>", type="linear")
fig.update_layout(layout)

fig.show()

In [None]:
fig = go.Figure()
# TRACES
fig.add_trace(create_trace(x=ftse_yearly_max_prices['year'][10:], y=normalise_single_series_to_100(ftse_yearly_max_prices['LOW_1'].astype(float)[10:]), label="Lowest annual price"))
# LAYOUT
layout = create_layout(title="Normalised FTSE100<br><sup>UK's stock market's index innit</sup>", type="linear")
fig.update_layout(layout)

# fig.update_xaxes(range=[2010,2025])

fig.show()

In [None]:
# Could factor in inflation using CPIH
# CPIH stands for the Consumer Prices Index including owner occupiers' housing costs, which is the UK's leading measure of inflation
# Source: https://www.ons.gov.uk/economy/inflationandpriceindices/timeseries/l55o/mm23
cpih = pd.read_csv(DATA_DIR / "CPIH-annual-rate-1989-2024-by-year.csv", skiprows=7)
n_years = 2024 - 1989 + 1
cpih.iloc[:n_years].tail()

## Combined Charts

In [None]:
fig = go.Figure()

# TODO - identify multiplier dynamically
fares_normalised = normalise_series_to_first_value(tflFares["singleZ1to4OysterPeak"][10:])
stocks_normalised = normalise_series_to_first_value(ftse_yearly_max_prices['HIGH_1'].astype(float)[10:])

# TRACES
fig.add_trace(create_trace(x=ftse_yearly_max_prices['year'][10:], y=stocks_normalised * 200 - 200, label="FTSE100 highs"))
fig.add_trace(create_trace(x=tflFares['year'][10:], y=fares_normalised * 200 - 200, label="Contactless ticket"))
# fig.add_trace(create_trace(x=tflFares['year'][10:], y=normalise_series_to_first_value(tflFares["singleZ1to4Cash"][10:]) * 200 - 200, label="Paper ticket"))
# fig.add_trace(create_trace(x=ftse_yearly_min_prices['year'][10:], y=normalise_series_to_first_value(ftse_yearly_max_prices['LOW_1'].astype(float)[10:]) * 200 - 200, label="FTSE100 lows"))

# LAYOUT
layout = create_layout(title="TfL Tube Fares vs the FTSE100<br><sup>Prices are normalised between 0-100</sup>")
fig.update_layout(layout)
fig.update_yaxes(title_text="Relative price")
fig.update_xaxes(title_text="Year")

fig.show()


# TODO: add annotiations for starting price
# Follow code here: https://plotly.com/python/line-charts/ -> section Label Lines with Annotations

In [None]:
# Multiplier pls
m1 = fares_normalised.max()
m2 = stocks_normalised.max()
m = max(m1, m2)
print(m)

stocks_normalised

In [None]:
normalise_single_series_to_100(pd.concat([tflFares["singleZ1to4OysterPeak"][10:], ftse_yearly_max_prices['HIGH_1'].astype(float)[10:]], axis=1))
# I need the same starting point and for the max to be 100

In [None]:
# Create step non-decreasing step function
fig = go.Figure()

fares_normalised = normalise_series_to_first_value(tflFares["singleZ1to4OysterPeak"][10:]) * 200 - 200
stocks_normalised = normalise_series_to_first_value(ftse_yearly_max_prices['HIGH_1'].astype(float)[10:]) * 200 - 200
# TRACES
fig.add_trace(create_trace(
    x=tflFares['year'][10:],
    y=fares_normalised, 
    label="Contactless ticket (PAYG)",
    line_shape="hv"
))
fig.add_trace(create_trace(
    x=ftse_yearly_max_prices['year'][10:], 
    y=stocks_normalised, 
    label="FTSE100 annual highs",
    # line_shape="hv"
))

# LAYOUT
layout = create_layout(title="TfL Tube Fares vs the FTSE100<br><sup>Prices are normalised to the same starting point of 1</sup>")
fig.update_layout(layout)

fig.show()

In [None]:
((fares_normalised - stocks_normalised) > 0).sum()
# 11 out of 15 years the tube prices rose above the FTSE100

# Reselling your tube tickets would reap in more revenue than investing it in the UK economy
#  through a stock ISA. Sadiq, Rachel - wanna chip in?

In [None]:
combined_fares = pd.read_csv(FARE_PRICES_DIR / "processed" / "combined_transport_fares.csv")
combined_indices = pd.read_csv(STOCK_PRICES_DIR / "combined_indices.csv")
# Time to plot dynamically. Variables
# - Transport modes selected
# - Indices selected
# - Year range
# How to distinguish between transport and stocks? Make transport costs a step function

In [None]:
import plotly.graph_objects as go

def normalise_series_to_first_value(series):
    return series / series.iloc[0]

def scale_to_max_100(normalised_series):
    # Scales the series so that max value corresponds to 100 (relative scaling)
    max_val = normalised_series.max()
    scale = 100 / max_val if max_val != 0 else 1
    return normalised_series * scale

def create_trace(x, y, label, line_shape='linear'):
    return go.Scatter(x=x, y=y, mode='lines+markers', name=label, line_shape=line_shape)

def create_layout(title):
    return dict(
        title=title,
        width=900,
        height=500,
        xaxis=dict(
            showgrid=False,
            linecolor="#7f7f7f",
            linewidth=2,
            ticks='outside'
        ),
        showlegend=True,
        plot_bgcolor='white'
    )

def dynamic_plot(selected_transport_modes, selected_stock_indices, year_range,
                 transport_data_dict, stock_data_dict):
    fig = go.Figure()

    # Find max growth among selected stock indices after normalisation (to scale others)
    max_stock_growth = 0
    scaled_stock_series = {}

    # Process stock indices first to find max growth for scaling
    for stock_key in selected_stock_indices:
        df = stock_data_dict[stock_key]
        df_filtered = df[(df['year'] >= year_range[0]) & (df['year'] <= year_range[1])]
        norm_series = normalise_series_to_first_value(df_filtered['high'].astype(float))
        scaled_series = scale_to_max_100(norm_series)
        scaled_stock_series[stock_key] = (df_filtered['year'], scaled_series)
        max_stock_growth = max(max_stock_growth, scaled_series.max())

    # Plot stock indices, scaled to max 100 among them
    for stock_key, (years, series) in scaled_stock_series.items():
        # Scale series comparatively so max across all stocks is 100
        scaled_y = series * (max_stock_growth / series.max() if series.max() != 0 else 1)
        fig.add_trace(create_trace(
            x=years,
            y=scaled_y * 2 - 200,  # Shift as in your example for plot clarity, adjust if needed
            label=f"{stock_key} annual highs",
            line_shape='linear'
        ))

    # Transport modes: independently normalised with same multiplier for clarity, no scaling against stocks
    # for mode in selected_transport_modes:
    #     df = transport_data_dict[mode]
    #     df_filtered = df[(df['year'] >= year_range[0]) & (df['year'] <= year_range[1])]
    #     norm_series = normalise_series_to_first_value(df_filtered['price'])  # Replace 'price' with your actual column name
    #     # No scaling here, just shifting for plot clarity as example
    #     plotted_y = norm_series * 2 - 200
    #     fig.add_trace(create_trace(
    #         x=df_filtered['year'],
    #         y=plotted_y,
    #         label=f"{mode} fare",
    #         line_shape='hv'  # Step function for transport modes
    #     ))

    layout = create_layout("Transport Fares vs Stock Indices<br><sup>Prices normalized and scaled for comparison</sup>")
    fig.update_layout(layout)
    fig.show()

dynamic_plot(
    selected_transport_modes=[] # HALO
)

# Ideas, thoughts

In [None]:
# Create plotly/streamlit chart where one can select the following from a dropdown
# - Stock Index -> Get S&P 500
# Tranportation mode -> we have average rail and bus/coach fares too from 

# Also need a dropdown for selecting years (2000-2025) and deal with missing values pls
# Though fares go back to 1987, and so do most indeces -> Did a search in perplexity

# Sources
# - Bus & coach: https://www.ons.gov.uk/economy/inflationandpriceindices/timeseries/docx/mm23
# - Train: https://www.ons.gov.uk/economy/inflationandpriceindices/timeseries/docw/mm23
# - FTSE100: downloaded from WSJ Markets
# - S&P 500: downloaded from WSJ Markets
# - DAX: downloaded from WSJ Markets
# - Nikkei 225: downloaded from WSJ Markets
# - Hang Seng: downloaded from WSJ Markets