In [None]:
import pandas as pd
import numpy as np

In [None]:
auckland_railway_stations = {
    "Western Line": [
        "Britomart", "Maungawhau / Mount Eden", "Kingsland", "Morningside", "Baldwin Avenue",
        "Mount Albert", "Avondale", "New Lynn", "Fruitvale Road", "Glen Eden", "Sunnyvale",
        "Henderson", "Sturges Road", "Ranui", "Swanson"
    ],
    "Southern Line": [
        "Britomart", "Maungawhau / Mount Eden", "Newmarket", "Remuera", "Greenlane",
        "Ellerslie", "Penrose", "Te Papapa", "Onehunga",  # Part of Onehunga Branch
        "Penrose", "Otahuhu", "Middlemore", "Papatoetoe", "Puhinui", "Manukau",  # Branch to Manukau
        "Homai", "Manurewa", "Te Mahia", "Takanini", "Papakura", "Drury (under construction)",
        "Paerata (under construction)", "Pukekohe"
    ],
    "Eastern Line": [
        "Britomart", "Parnell", "Orakei", "Meadowbank", "Glen Innes", "Panmure", "Sylvia Park",
        "Otahuhu", "Middlemore", "Papatoetoe", "Puhinui", "Manukau"
    ],
    "Onehunga Line": [
        "Britomart", "Maungawhau / Mount Eden", "Newmarket", "Remuera", "Greenlane",
        "Ellerslie", "Penrose", "Te Papapa", "Onehunga"
    ]
}


In [None]:
type(auckland_railway_stations)

In [None]:
auckland_railway_stations_count = {
    "southern line": len(auckland_railway_stations['Western Line']),
    "western line": len(auckland_railway_stations['Southern Line']),
    "eastern line": len(auckland_railway_stations['Eastern Line']),
    "onehunga line": len(auckland_railway_stations['Onehunga Line'])
}

In [None]:
auckland_railway_stations_count

In [None]:
auckland_railway_stations.keys()
auckland_railway_stations.items()

In [None]:
rows = []
for line, stations in auckland_railway_stations.items():
    for station in stations:
        rows.append({"Line": line, "Station": station})

df = pd.DataFrame(rows)

In [None]:
df

In [None]:
df['colour'] = df.apply(
    lambda row: 'green' if row['line'] == 'Western Line'
    else 'yellow' if row['line'] == 'Eastern Line'
    else 'red' if row['line'] == 'Southern Line'
    else 'blue' if row['line'] == 'Onehunga Line'
    else 'grey',
    axis=1
)

alternative and intuitive

In [None]:
df['colour'] = ""

df.loc[df["Line"] == "Western Line", "Colour"] = "green"
df.loc[df["Line"] == "Eastern Line", "Colour"] = "blue"
df.loc[df["Line"] == "Southern Line", "Colour"] = "yellow"
df.loc[df["Line"] == "Onehunga Line", "Colour"] = "red"

In [None]:
df

In [None]:
df.groupby("Colour").size().reset_index(name = 'count')

## alternative way

In [None]:
auckland_railway_stations_count

In [None]:
df2 = pd.DataFrame(auckland_railway_stations_count.items(), columns=['Line', 'station_count'])
print(df2)

In [None]:
df2 = pd.DataFrame.from_dict(auckland_railway_stations_count, orient='index', columns=['station_count'])
df2.reset_index(inplace=True)
df2.rename(columns={'index': 'Line'}, inplace=True)

In [None]:
df2

In [None]:
df2['colour'] = ""

df2.loc[df2["Line"] == "western line", "colour"] = "green"
df2.loc[df2["Line"] == "eastern line", "colour"] = "blue"
df2.loc[df2["Line"] == "southern line", "colour"] = "yellow"
df2.loc[df2["Line"] == "onehunga line", "colour"] = "red"

In [None]:
df2

In [None]:
#df2.drop(columns=['Colour'], inplace=True)

# if you already created a wide format

In [None]:
# Determine the max length among all line lists
max_len = max(len(stations) for stations in auckland_railway_stations.values())

# Pad lists to equal length
padded_data = {
    line: stations + [None] * (max_len - len(stations))
    for line, stations in auckland_railway_stations.items()
}

# Create wide DataFrame
df_wide = pd.DataFrame(padded_data)

print(df_wide.head())

In [None]:
df_wide

In [None]:
# df_wide already created in previous step
df_long = df_wide.reset_index().melt(
    id_vars='index',
    var_name='line',
    value_name='station'
).rename(columns={'index': 'position'})

# Optional: drop missing stations
df_long = df_long.dropna(subset=['station'])


In [None]:
df_long