In [1]:
import pandas as pd
import altair as alt
import numpy as np
import eco_style
alt.themes.enable('light')

ThemeRegistry.enable('light')

In [86]:
df = pd.read_excel("publicsectorcurrentreceiptsappendixdfinal.xlsx", sheet_name="Time Series", skiprows=4)
df = df.iloc[1:,:]
df.columns = [c.replace("\n", "") for c in df.columns]
df.columns = [c.replace("  ", " ") for c in df.columns]

df.columns.to_list()
category_mapping = {
  "Income tax": [
    "Income tax (gross of tax credits) - total (£ millions)"
  ],
  "National Insurance": [
    "Social contributions [note 2](£ millions)"
  ],
  "VAT": [
    "Value added tax - total (£ millions)"
  ],
  "Company taxes": [
    "Corporation tax - total (£ millions)",
    "Petroleum revenue tax (£ millions)"
  ],
  # "Other indirect taxes": [
  #   "Fuel duties (£ millions)",
  #   "Stamp duty land tax (£ millions)",
  #   "Stamp taxes on shares (£ millions)",
  #   "Tobacco duties (£ millions)",
  #   "Alcohol duties - total (£ millions)",
  #   "Customsduties (£ millions)",
  #   "Vehicle excise duties paid by businesses (£ millions)",
  #   "Vehicle excise duties paid by households (£ millions)",
  #   "Other taxes on production - total (£ millions)",
  #   "Bank levy (£ millions)",
  #   "Television Licence fee receipts (£ millions)"
  # ],
  "Business rates & council tax": [
    "Business rates (£ millions)",
    "Council tax (£ millions)"
  ],
  "Other": [
    "Miscellaneous other taxes - total (£ millions)",
    "Miscellaneous taxes on income and wealth - total (£ millions)",
    "Other central government taxes (£ millions)",
    "Other local government taxes (£ millions)",
    "Public sector interest and dividend receipts - total (£ millions)",
    "Public sector gross operating surplus (£ millions)",
    "Other public sector receipts (£ millions)",
    #
    "Fuel duties (£ millions)",
    "Stamp duty land tax (£ millions)",
    "Stamp taxes on shares (£ millions)",
    "Tobacco duties (£ millions)",
    "Alcohol duties - total (£ millions)",
    "Customsduties (£ millions)",
    "Vehicle excise duties paid by businesses (£ millions)",
    "Vehicle excise duties paid by households (£ millions)",
    "Other taxes on production - total (£ millions)",
    "Bank levy (£ millions)",
    "Television Licence fee receipts (£ millions)"

  ]
}

# Create a new DataFrame to store the summed results for each time period
summed_df = pd.DataFrame()

# Loop through each category and sum the relevant columns for each "Time period"
for category, columns in category_mapping.items():
    # Check if columns exist in the DataFrame to avoid KeyError
    existing_columns = [col for col in columns if col in df.columns]
    if existing_columns:
        summed_df[category] = df.groupby("Time period")[existing_columns].sum().sum(axis=1)

# Include the "Time period" column in the result
summed_df["Time period"] = df["Time period"].unique()

# Move "Time period" to the front
summed_df = summed_df.set_index("Time period").reset_index()

summed_df = summed_df.melt(id_vars="Time period", var_name="series", value_name="value")

summed_df.columns = ["date", "series", "value"]

# parse the dates
summed_df["date"] = pd.to_datetime(summed_df["date"], format="%Y %b")

# Group by year
summed_df['year'] = summed_df['date'].dt.year
summed_df = summed_df.drop(columns=["date"])

# Sum over year and series
summed_df = summed_df.groupby(["year", "series"]).sum().reset_index()

df = summed_df.copy()

# add an order column - sorted by the 2023 value
sort_order = df.query("year == 2023").sort_values("value", ascending=False)["series"].to_list()
df['order'] = df['series'].apply(lambda x: sort_order.index(x))

gdp_df = pd.read_excel("gdp.xlsx").rename(columns={"date": "year"})
#gdp_df['year'] = pd.to_datetime(gdp_df['date'], format="%Y")

df = df.merge(gdp_df, on="year", how="left").query("year < 2024").rename(columns={"value": "public_sector_receipts"})
df['value'] = df['public_sector_receipts'] / df['gdp'] 
df['year'] = pd.to_datetime(df['year'], format="%Y")

chart = alt.Chart(df).mark_area().encode(
    x=alt.X("year:T", title=""),
    y=alt.Y("value:Q", title="", axis=alt.Axis(labelExpr="format(datum.value, '.0%') + (datum.value == 0.45 ? ' of GDP' : '')")),
    color=alt.Color("series:N", title="Category", legend=None),
    tooltip=["year", "value"],
    order=alt.Order("order:O")
).properties(
    title="",
    width=500
)

labels = chart.transform_filter(
    "year(datum.year) == 2023"
).mark_text(align='left', 
                         baseline='middle',
                         dx=5,
                         dy=alt.expr("""
                          {"VAT": -130, "National Insurance": -180, "Income tax": 5, "Company taxes": -230, "Other indirect taxes": -200, "Business rates & council tax": -250, "Other": -60}[datum.series]
                                     """)).encode(
    x=alt.X("year:T"),
    y=alt.Y("value:Q"),
    text=alt.Text("series:N"),
)

chart = chart + labels

chart.save("public_sector_receipts.png", scale_factor=3.0)
chart.save("public_sector_receipts.json")

chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# Fig 2

In [132]:
df = pd.read_excel("gdp_growth.xlsx")
df = df[['date_str', 'date', 'value']]

df['color'] = np.where(df['value'] > 0, "#36B7B4", "#E6224B")
df['color'] = np.where(df['value'] == 0, "#727272", df['color'])

bars = alt.Chart(df).mark_bar(
    width=30,
    
).encode(
    x=alt.X("yearquarter(date):T", title="",
            scale=alt.Scale(domainMax=alt.DateTime(year=2024, month=6
                                                   , date=28))),
    y=alt.Y("value:Q", title=""),
    tooltip=["date", "value"],
    color=alt.Color("color:N", scale=None)
).properties(
    title="",
    width=500
)

labels = bars.mark_text(align='center', 
                        dx=20,
                        dy=alt.expr("datum.value > 0 ? -3 : 3"),
                        text=alt.expr("datum.value + '%'"),
                        baseline=alt.expr("datum.value > 0 ? 'bottom' : 'top'"),
                        
                        ).encode(
    x=alt.X("yearquarter(date):T"),
    y=alt.Y("value:Q", axis=alt.Axis(labelExpr="datum.label + '%'")),

)

chart = bars + labels

chart.save("gdp_growth.png", scale_factor=3.0)
chart.save("gdp_growth.json")

chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [36]:
df.columns

Index(['year', 'series', 'value'], dtype='object')

In [12]:
df['Social contributions [note 2] (£ millions)']

KeyError: 'Social contributions [note 2] (£ millions)'