<a href="https://colab.research.google.com/github/hcantekin/hcantekin.github.io/blob/main/gdp_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [32]:
!pip install vl-convert-python



In [33]:
!pip install altair-saver



In [59]:
import pandas as pd # basic data manipulation
import re
import vl_convert
import altair as alt # charts
import eco_style # the Economics Observatory custom style
import numpy as np # numerical operations
import requests # fetching data from the web
import json # parsing JSON
import os # file operations
alt.themes.enable('light')

ThemeRegistry.enable('light')

You will need to download font file under 'fonts' if not already on your device, otherwise the png's will be missing all text.

In [60]:
# Download and register the fonts
os.makedirs("fonts", exist_ok = True)
req = requests.get("https://github.com/midianinja/som-web/blob/master/public/fonts/CircularStd-Book.otf?raw=true")
with open('fonts/CircularStd-Book.otf', 'wb') as f:
  f.write(req.content)

vl_convert.register_font_directory("fonts")

TO-DO: Change the name of the file for path and path2 to match uploaded doc name, only the year/month should change (note the files are xlxs).
You will also need the eco styling doc uploaded (eco_style.py).

In [61]:
# GDP tables data
path = 'monthlygdptablesoct2024.xlsx'
data = pd.read_excel(path, sheet_name = 'GVA', skiprows = 4)


In [62]:
# Contributions data
path2 = 'monthlycontributionstablesoct2024.xlsx'
data2= pd.read_excel(path2, sheet_name = 'CONTRIBUTIONS', skiprows = 4)

Name of the bulletin dataset doesn't change but new file still needs to be uploaded.

In [63]:
path3 = 'bulletindataset2.xlsx'
data3 = pd.read_excel(path3, sheet_name = 'Table 3a', skiprows = 4)
data4 = pd.read_excel(path3, sheet_name = 'Table 3b',  skiprows = 4)

NB: First two bar charts on GVA use an API instead of Excel doc so should update automatically when ONS releases new data.

In [64]:
url = 'https://api.beta.ons.gov.uk/v1/data?uri=/economy/grossdomesticproductgdp/timeseries/ecyx/mgdp'

# https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/ecyx/mgdp

headline1_req = requests.get(url) # Request the data
gva1_data = json.loads(headline1_req.text) # Parse the JSON
gva1_data.keys() # Check the keys - what options do we have?

dict_keys(['years', 'quarters', 'months', 'sourceDatasets', 'relatedDatasets', 'relatedDocuments', 'versions', 'type', 'uri', 'description'])

In [65]:
df = pd.DataFrame(gva1_data['months']) # Convert the data to a DataFrame


df['date'] = pd.to_datetime(df['date']) # Fixing the date format

df['value'] = df['value'].astype(float)/100 # Expressing percentages as [0,1] rather than [0,100]

df = df.query("date >= '2023-10-01'") # Filter the data to 2021 onwards

df['highlight'] = False
df.iloc[-1, df.columns.get_loc('highlight')] = True

tolerance = 1.03

chart = alt.Chart(df).mark_bar(color = '#707070').encode(
    x=alt.X('yearmonth(date):T', title=''),
    y=alt.Y('value:Q', title='Total GVA, monthly change, basic prices',axis=alt.Axis(format='%'), scale=alt.Scale(domain=[df.value.min()*tolerance, df.value.max()*tolerance]))
).properties(
    title=''
)

zero_line = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color="#676A86", strokeWidth=0.6).encode(
    y='y:Q'
)

highlight = chart.transform_filter(alt.datum.highlight).mark_bar(
    color='#36B7B4'
)

text = chart.mark_text(
    align='center',
    opacity=0.8,
    # "#676A86"
    baseline='middle',
    dx=16,  # Adjust horizontal position
    dy=alt.expr("datum.value < 0 ? 10 : -10")
).encode(
     color=alt.condition(
        alt.datum.highlight,  # Highlight condition
        alt.value('#36B7B4'),  # Blue for highlighted bars
        alt.value('#707070')  # Grey for others
    ),
    text=alt.Text('value:Q', format='.1%')  # Format as percentage
)

image = alt.Chart(pd.DataFrame([{'url': 'https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/refs/heads/main/guidelines/logos/eco-logo-light.png'}])).mark_image(
    height=30,
    width=90,
    opacity=0.5,
    # align='right',
    baseline='bottom'
).encode(
    url='url',
    x=alt.value(380),
    y=alt.value(-10)
)

chart = chart + zero_line + text + highlight + image
os.makedirs("charts", exist_ok=True)
chart.save("charts/gva_month.json")
chart.save("charts/gva_month.png", scale_factor=2.0)
chart.save("charts/gva_month.svg")
chart

  df['date'] = pd.to_datetime(df['date']) # Fixing the date format
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['highlight'] = False


In [66]:
url = 'https://api.beta.ons.gov.uk/v1/data?uri=/economy/grossdomesticproductgdp/timeseries/ed2r/mgdp'

# https://www.ons.gov.uk/economy/grossdomesticproductgdp/timeseries/ecyx/mgdp

headline2_req = requests.get(url) # Request the data
gva2_data = json.loads(headline2_req.text) # Parse the JSON
gva2_data.keys() # Check the keys - what options do we have?


dict_keys(['years', 'quarters', 'months', 'sourceDatasets', 'relatedDatasets', 'relatedDocuments', 'versions', 'type', 'uri', 'description'])

In [67]:
df2 = pd.DataFrame(gva2_data['months']) # Convert the data to a DataFrame


df2['date'] = pd.to_datetime(df2['date']) # Fixing the date format
df2['value'] = df2['value'].astype(float)/100 # Expressing percentages as [0,1] rather than [0,100]

df2 = df2.query("date >= '2023-10-01'") # Filter the data to 2021 onwards

df2['highlight'] = False
df2.iloc[-1, df2.columns.get_loc('highlight')] = True

tolerance = 1.03

chart2 = alt.Chart(df2).mark_bar(color = '#707070').encode(
    x=alt.X('yearmonth(date):T', title=''),
    y=alt.Y('value:Q', title='Total GVA, year-on-year change, basic prices',axis=alt.Axis(format='%'), scale=alt.Scale(domain=[df2.value.min()*tolerance, df2.value.max()*tolerance])),
    # color=alt.Color('highlight:N', scale=alt.Scale(domain=[False, True], range=['#707070', '#E6224B']), legend=None)
).properties(
    title=''
)

highlight = chart2.transform_filter(alt.datum.highlight).mark_bar(
    color='#36B7B4'
)

zero_line = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color="#676A86", strokeWidth=0.6).encode(
    y='y:Q'
)

text = chart2.mark_text(
    align='center',
    color="#36B7B4",
    opacity=0.8,
    # "#676A86"
    baseline='middle',
    dx=16,  # Adjust horizontal position
    dy=alt.expr("datum.value < 0 ? 10 : -10")
).encode(
     color=alt.condition(
        alt.datum.highlight,  # Highlight condition
        alt.value('#36B7B4'),  # Blue for highlighted bars
        alt.value('#707070')  # Grey for others
    ),
    text=alt.Text('value:Q', format='.1%')  # Format as percentage
)

image = alt.Chart(pd.DataFrame([{'url': 'https://raw.githubusercontent.com/EconomicsObservatory/ECOvisualisations/refs/heads/main/guidelines/logos/eco-logo-light.png'}])).mark_image(
    height=30,
    width=90,
    opacity=0.5,
    # align='right',
    baseline='bottom'
).encode(
    url='url',
    x=alt.value(380),
    y=alt.value(-10)
)


chart2 = chart2 + zero_line + text + highlight + image
chart2.save("charts/gva_yoy.json")
chart2.save("charts/gva_yoy.png", scale_factor=2.0)
chart2.save("charts/gva_yoy.svg")
chart2

  df2['date'] = pd.to_datetime(df2['date']) # Fixing the date format


In [68]:
# filtering and cleaning data
# filtering data to monthly data and making sure date is datetime format
monthly_data = data[data['Category'].str.strip().str.casefold() == 'Percentage change, latest month on previous month'.casefold()]

monthly_data['Time Period'] = pd.to_datetime(monthly_data['Time Period'], format='%Y %b', errors='coerce')

# Filter the most recent row (so latest release)
most_recent_row = monthly_data.loc[[monthly_data['Time Period'].idxmax()]]
recent_long = most_recent_row.melt(id_vars=['Time Period'], var_name='type', value_name='value')

# filtering to cols we are interested in (in this case services)
start_index = recent_long[recent_long['type'] == 'Total service industries (G-T)'].index[0]
# Slice the DataFrame to include rows after this index
services_data_month = recent_long.iloc[start_index:]

# Remove letters in brackets from the 'type' column
services_data_month['type'] = services_data_month['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)

# print(services_data_month.type.unique())

# function to clean up text
def split_text(text, limit):
    """
    Splits a long text into multiple lines, ensuring no line exceeds the limit.

    Args:
        text (str): The input text to split.
        limit (int): The maximum allowed line length.

    Returns:
        list: A list of strings split within the limit.
    """

    rebinds = {
        "Activities of households as employers, undifferentiated goods and services": "Household employers; other goods & services",
        "Wholesale and retail: repair of motor vehicles and motorcycles": "Wholesale & retail; repair of vehicles",
    }

    text = rebinds.get(text, text)

    text = text.replace(' and ', ' & ')

    text = re.sub(r'\s*\[[^]]+\]', '', text)

    text = text.replace("activities", "").replace("activites", "")

    words = text.split()
    lines = []
    current_line = ""

    for word in words:
        # Check if adding the next word would exceed the limit
        if len(current_line) + len(word) + (1 if current_line else 0) > limit:
            # Append the current line to the list
            lines.append(current_line)
            current_line = word  # Start a new line with the current word
        else:
            # Add word to the current line (with a space if needed)
            current_line += (" " if current_line else "") + word

    # Add the last line if it's not empty
    if current_line:
        lines.append(current_line)

    return lines

services_data_month.columns = [c.strip() for c in services_data_month.columns] # The column names have some extra spaces that we remove - a common issue with some ONS releases

services_data_month['value'] = services_data_month['value'].astype(float)/100

services_data_month['highlight'] = False
services_data_month.loc[
    services_data_month['type'] == 'Total service industries', 'highlight'
] = True

services_data_month['type'] = services_data_month['type'].map(lambda x: split_text(x, 60)) # Use the split_text function to split the long division names
services_data_month['zero_dummy'] = 0

chart = alt.Chart(services_data_month).encode(
    y=alt.Y('type:N',
            title='',
            sort='-x',
            axis=alt.Axis(
               labelBaseline= 'middle',
               labels=False,
                domain=False,
                labelLimit=9999999)),

    x=alt.X('value:Q', title='',
            # scale=alt.Scale(domainMin=-0.019),
            axis=alt.Axis(
                labelLimit=200,
        format='%', offset=5),
        ),
        color=alt.Color('highlight:N', scale=alt.Scale(domain=[False, True], range=['#707070', '#36B7B4']), legend=None)
)

bars = chart.mark_bar()
labels = chart.mark_text(
    opacity=0.7,
    align=alt.expr("if(datum.value < 0, 'right', 'left')"),
    baseline='middle',
    dx=alt.expr("datum.value < 0 ? -5 : 5"),
    color='black',
    fontSize=10,
    text=alt.expr("format(datum.value, '.1%')")
)

type_labels = chart.mark_text(
    align=alt.expr("datum.value < 0 ? 'left' : 'right'"),
    dx=alt.expr("datum.value < 0 ? 5 : -5"),
    opacity=0.7,
).encode(
    text=alt.Text('type:N'),
    x=alt.X('zero_dummy:Q'),
)

chart = bars + labels + image.encode(x=alt.value(390), y=alt.value(-15), opacity=alt.value(0.5))  + type_labels

chart = chart.properties(
    width=350,
    height=400
).configure_bar(
    height=25,
)

chart.save("charts/services_month.json")
chart.save("charts/services_month.png", scale_factor=2.0)
chart.save("charts/services_month.svg")
chart


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  monthly_data['Time Period'] = pd.to_datetime(monthly_data['Time Period'], format='%Y %b', errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  services_data_month['type'] = services_data_month['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-co

In [69]:
# ['Total service industries'
#  'Wholesale and retail: repair of motor vehicles and motorcycles'
#  'Transport and storage' 'Accommodation and food service activites'
#  'Information and communication' 'Financial and insurance activities'
#  'Real estate activites'
#  'Professional, scientific and technical activities'
#  'Administrative and support service activities'
#  'Public administration and defence' 'Education'
#  'Human health and social work activities'
#  'Arts, entertainment and recreation' 'Other service activities'
#  'Activities of households as employers, undifferentiated goods and services']


Note: if you get an attribute error running above cell, try running all cells again.

In [70]:
# repreating previous filtering/cleaning steps for year-on-year
yoy_data = data[data['Category'].str.strip().str.casefold() == 'Percentage change, latest month on same month a year ago'.casefold()]
yoy_data['Time Period'] = pd.to_datetime(yoy_data['Time Period'], format='%Y %b', errors='coerce')

# Filter the most recent row
most_recent_row2 = yoy_data.loc[[yoy_data['Time Period'].idxmax()]]
recent_long2 = most_recent_row2.melt(id_vars=['Time Period'], var_name='type', value_name='value')
start_index2 = recent_long2[recent_long2['type'] == 'Total service industries (G-T)'].index[0]

# Slice the DataFrame to include rows after this index
services_data_yoy = recent_long2.iloc[start_index2:]

# Remove letters in brackets from the 'type' column
services_data_yoy['type'] = services_data_yoy['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)

# Highlight the row where 'type' is 'Total service industries'
services_data_yoy.columns = [c.strip() for c in services_data_yoy.columns] # The column names have some extra spaces that we remove - a common issue with some ONS releases

services_data_yoy['value'] = services_data_yoy['value'].astype(float)/100

services_data_yoy['highlight'] = False
services_data_yoy.loc[
    services_data_yoy['type'] == 'Total service industries', 'highlight'
] = True

services_data_yoy['type'] = services_data_yoy['type'].map(lambda x: split_text(x, 60)) # Use the split_text function to split the long division names
services_data_yoy['zero_dummy'] = 0

services_data_yoy = services_data_yoy.sort_values('value', ascending=True)  # Sort the data by the value column

chart2 = alt.Chart(services_data_yoy).encode(
    y=alt.Y('type:N',
            title='',
            sort='-x',
            axis=alt.Axis(
                labelBaseline='middle',
                labels =False,
                domain=False,
                labelLimit=9999999)),

    x=alt.X('value:Q', title='',
            # scale=alt.Scale(domainMin=-0.029),
            axis=alt.Axis(

                labelLimit=200,
        format='%', offset=5),
        ),
        color=alt.Color('highlight:N', scale=alt.Scale(domain=[False, True], range=['#707070', '#36B7B4']), legend=None)
)

bars = chart2.mark_bar()

labels = chart2.mark_text(
     opacity=0.7,
    align=alt.expr("if(datum.value < 0, 'right', 'left')"),
    baseline='middle',
    dx=alt.expr("datum.value < 0 ? -5 : 5"),
    color='black',
    fontSize=10,
    text=alt.expr("format(datum.value, '.1%')")
)

type_labels = chart2.mark_text(
    align=alt.expr("datum.value < 0 ? 'left' : 'right'"),
    dx=alt.expr("datum.value < 0 ? 5 : -5"),
    opacity=0.7,
).encode(
    text=alt.Text('type:N'),
    x=alt.X('zero_dummy:Q'),
)

chart2 = bars + labels + image.encode(x=alt.value(300), y=alt.value(-15), opacity=alt.value(0.5))  + type_labels

chart2 = chart2.properties(
    width=300,
    height=400
).configure_bar(
    height=25,
)

chart2.save("charts/services_yoy.json")
chart2.save("charts/services_yoy.png", scale_factor=2.0)
chart2.save("charts/services_yoy.svg")

chart2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yoy_data['Time Period'] = pd.to_datetime(yoy_data['Time Period'], format='%Y %b', errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  services_data_yoy['type'] = services_data_yoy['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  service

In [71]:
# filtering to cols we are interested in (in this case production)
start_index = recent_long[recent_long['type'] == 'Total production industries (B - E)'].index[0]
# Slice the DataFrame to include rows after this index
prod_data_month = recent_long.iloc[start_index:8]

# Remove letters in brackets from the 'type' column
prod_data_month['type'] = prod_data_month['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
prod_data_month['value'] = prod_data_month['value'].astype(float)/100

prod_data_month['highlight'] = False
prod_data_month.loc[
    prod_data_month['type'] == 'Total production industries', 'highlight'
] = True

prod_data_month['type'] = prod_data_month['type'].map(lambda x: split_text(x, 60)) # Use the split_text function to split the long division names
prod_data_month['zero_dummy'] = 0

prod_data_month = prod_data_month.sort_values('value', ascending=True)  # Sort the data by the value column

chart3 = alt.Chart(prod_data_month).encode(
    y=alt.Y('type:N',
            title='',
            sort='-x',
            axis=alt.Axis(
                labelBaseline='middle',
                labels =False,
                domain=False,
                labelLimit=9999999)),

    x=alt.X('value:Q', title='',
            # scale=alt.Scale(domainMin=-0.029),
            axis=alt.Axis(

                labelLimit=200,
        format='%', offset=5),
        ),
        color=alt.Color('highlight:N', scale=alt.Scale(domain=[False, True], range=['#707070', '#36B7B4']), legend=None)
)

bars = chart3.mark_bar()

labels = chart3.mark_text(
     opacity=0.7,
    align=alt.expr("if(datum.value < 0, 'right', 'left')"),
    baseline='middle',
    dx=alt.expr("datum.value < 0 ? -5 : 5"),
    color='black',
    fontSize=10,
    text=alt.expr("format(datum.value, '.1%')")
)

type_labels = chart3.mark_text(
    align=alt.expr("datum.value < 0 ? 'left' : 'right'"),
    dx=alt.expr("datum.value < 0 ? 5 : -5"),
    opacity=0.7,
).encode(
    text=alt.Text('type:N'),
    x=alt.X('zero_dummy:Q'),
)

chart3 = bars + labels + image.encode(x=alt.value(300), y=alt.value(-15), opacity=alt.value(0.5))  + type_labels

chart3 = chart3.properties(
    width=300,
    height=200
).configure_bar(
)

chart3.save("charts/production_month.json")
chart3.save("charts/production_month.png", scale_factor=2.0)
chart3.save("charts/production_month.svg")

chart3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prod_data_month['type'] = prod_data_month['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prod_data_month['value'] = prod_data_month['value'].astype(float)/100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prod_data_month['highlight'] = False
A v

In [72]:
# Filter the most recent row
most_recent_row2 = yoy_data.loc[[yoy_data['Time Period'].idxmax()]]
recent_long2 = most_recent_row2.melt(id_vars=['Time Period'], var_name='type', value_name='value')
start_index2 = recent_long2[recent_long2['type'] == 'Total production industries (B - E)'].index[0]

# Slice the DataFrame to include rows after this index
prod_data_yoy = recent_long2.iloc[start_index2:8]

# Remove letters in brackets from the 'type' column
prod_data_yoy['type'] = prod_data_yoy['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)

# Highlight the row where 'type' is 'Total service industries'
prod_data_yoy.columns = [c.strip() for c in prod_data_yoy.columns] # The column names have some extra spaces that we remove - a common issue with some ONS releases

prod_data_yoy['value'] = prod_data_yoy['value'].astype(float)/100

prod_data_yoy['highlight'] = False
prod_data_yoy.loc[
    prod_data_yoy['type'] == 'Total production industries', 'highlight'
] = True

prod_data_yoy['type'] = prod_data_yoy['type'].map(lambda x: split_text(x, 60)) # Use the split_text function to split the long division names
prod_data_yoy['zero_dummy'] = 0

prod_data_yoy = prod_data_yoy.sort_values('value', ascending=True)  # Sort the data by the value column

chart4 = alt.Chart(prod_data_yoy).encode(
    y=alt.Y('type:N',
            title='',
            sort='-x',
            axis=alt.Axis(
                labelBaseline='middle',
                labels =False,
                domain=False,
                labelLimit=9999999)),

    x=alt.X('value:Q', title='',
            # scale=alt.Scale(domainMin=-0.05),
            axis=alt.Axis(

                labelLimit=200,
        format='%', offset=5),
        ),
        color=alt.Color('highlight:N', scale=alt.Scale(domain=[False, True], range=['#707070', '#36B7B4']), legend=None)
)

bars = chart4.mark_bar()

labels = chart4.mark_text(
     opacity=0.7,
    align=alt.expr("if(datum.value < 0, 'right', 'left')"),
    baseline='middle',
    dx=alt.expr("datum.value < 0 ? -5 : 5"),
    color='black',
    fontSize=10,
    text=alt.expr("format(datum.value, '.1%')")
)

type_labels = chart4.mark_text(
    align=alt.expr("datum.value < 0 ? 'left' : 'right'"),
    dx=alt.expr("datum.value < 0 ? 5 : -5"),
    opacity=0.7,
).encode(
    text=alt.Text('type:N'),
    x=alt.X('zero_dummy:Q'),
)

chart4 = bars + labels + image.encode(x=alt.value(340), y=alt.value(-15), opacity=alt.value(0.5))  + type_labels

chart4 = chart4.properties(
    width=300,
    height=200
).configure_bar(
)

chart4.save("charts/production_yoy.json")
chart4.save("charts/production_yoy.png", scale_factor=2.0)
chart.save("charts/production_yoy.svg")

chart4

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prod_data_yoy['type'] = prod_data_yoy['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prod_data_yoy['value'] = prod_data_yoy['value'].astype(float)/100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prod_data_yoy['highlight'] = False
A value is tr

In [73]:
# monthly_data2 = monthly_data.rename(columns={'Time Period': 'date'})
# monthly_data2['date'] = pd.to_datetime(monthly_data2['date'])
# monthly_data2 = monthly_data2.query("date >= '2023-10-01'")
# prod_monthly_data = monthly_data2.iloc[:, [0, 5, 6, 7, 8]]
# prod_monthly_data_total = monthly_data2.iloc[:, [0, 4]]
# prod_monthly_long = prod_monthly_data.melt(id_vars=['date'], var_name='type', value_name='value')
# prod_monthly_long['type'] = prod_monthly_long['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
# prod_monthly_long_total = prod_monthly_data_total.melt(id_vars=['date'], var_name='type', value_name='value')
# prod_monthly_long_total['type'] = prod_monthly_long_total['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)

# most_recent_date = prod_monthly_long_total['date'].max()
# most_recent_value = prod_monthly_long_total.loc[prod_monthly_long_total['date'] == most_recent_date, 'value'].iloc[0]

# # prod_monthly_long['value'] = prod_monthly_long['value'].astype(float)/100

# chart = alt.Chart(prod_monthly_long).mark_bar(width = 30, opacity=0.6).encode(
#     x=alt.X('date:T', title='', scale=alt.Scale(padding=16), axis=alt.Axis(format='%b-%Y')),
#     y=alt.Y('value:Q', title='',
#             axis=alt.Axis(labelExpr="format(datum.value,',') + '%'"),
#             #  scale=alt.Scale(domain=[-0.5, 0.5]),
#               stack='zero'),
#     color=alt.Color('type:N', sort=alt.Sort('ascending'),
#                     scale=alt.Scale(
#                         scheme="observables10",
#                     ),
#                     legend=alt.Legend(orient='top', direction='horizontal', title='')),
#      tooltip = ['date:T', 'value:Q', 'type:N']

# ).properties(
#      width= 400,
#         height=300
# )

# total = alt.Chart(prod_monthly_long_total).mark_line(interpolate='monotone', color = 'black', strokeWidth= 2).encode(
#     x=alt.X('date:T', title=''),
#     y=alt.Y('value:Q', title='',
#             axis=alt.Axis(labelExpr="format(datum.value,',') + '%'")),
#     tooltip = ['date:T', 'value:Q']

# )

# points_total = alt.Chart(prod_monthly_long_total).mark_point(filled=True, color='black', size=30, opacity=1).encode(
#     x=alt.X('date:T', title=''),
#     y=alt.Y('value:Q', title=''),
#     tooltip=['date:Q', 'value:Q']
# )

# total_text = (
#     alt.Chart(pd.DataFrame({
#         'date': [most_recent_date],
#         'value': [most_recent_value],
#         'label': [f"Production industries: {most_recent_value}%"]
#     }))
#     .mark_text(align='right', dx=170, dy=0, color='black', size=12)
#     .encode(
#         x='date:T',
#         y='value:Q',
#         text='label:N'
#     )
# )

# chart10 =  chart + total + points_total + total_text
# chart10.save("charts/production_month_stack.json")
# chart10.save("charts/production_month_stack.png", scale_factor=2.0)
# chart10.save("charts/production_month_stack.svg")
# chart10

In [74]:
# yoy_data = data[data['Category'].str.strip().str.casefold() == 'Percentage change, latest month on same month a year ago'.casefold()]
# yoy_data2 = yoy_data.rename(columns={'Time Period': 'date'})
# yoy_data2['date'] = pd.to_datetime(yoy_data2['date'])
# yoy_data2 = yoy_data2.query("date >= '2023-10-01'")
# prod_yoy_data = yoy_data2.iloc[:, [0, 5, 6, 7, 8]]
# prod_yoy_data_total = yoy_data2.iloc[:, [0, 4]]
# prod_yoy_long = prod_yoy_data.melt(id_vars=['date'], var_name='type', value_name='value')
# prod_yoy_long['type'] = prod_yoy_long['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
# prod_yoy_long_total = prod_yoy_data_total.melt(id_vars=['date'], var_name='type', value_name='value')
# prod_yoy_long_total['type'] = prod_yoy_long_total['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)

# most_recent_date = prod_yoy_long_total['date'].max()
# most_recent_value = prod_yoy_long_total.loc[prod_yoy_long_total['date'] == most_recent_date, 'value'].iloc[0]

# # prod_monthly_long['value'] = prod_monthly_long['value'].astype(float)/100

# chart = alt.Chart(prod_yoy_long).mark_bar(
#     opacity=1
# ).encode(
#     # x=alt.X('date:T', title='', scale=alt.Scale(padding=16), axis=alt.Axis(format='%b %Y')),
#     x = alt.X('yearmonth(date):T',scale=alt.Scale(), axis=alt.Axis(format='%b %Y')),
#     y=alt.Y('value:Q', title='',
#             axis=alt.Axis(labelExpr="format(datum.value,',') + '%'"),
#             #  scale=alt.Scale(domain=[-0.5, 0.5]),
#               stack='zero'),
#     color=alt.Color('type:N', sort=alt.Sort('ascending'),
#                     legend=alt.Legend(orient='top', direction='horizontal', title=''),
#                     scale=alt.Scale(
#              range=[
#             "#A3E6E4",  # Pastel of #36B7B4
#             # "#F597A6",  # Pastel of #E6224B
#             # "#F9E3A8",  # Pastel of #F4C245
#             "#7DB8E0",  # Pastel of #0063AF
#             "#80D4B0",  # Pastel of #00A767
#             "#89D3F4",  # Pastel of #179FDB
#             "#F7A485",  # Pastel of #EB5C2E
#         ])),
#      tooltip = ['date:T', 'value:Q', 'type:N']

# ).properties(
#      width= 400,
#         height=300
# )

# prod_yoy_long_total['date'] = prod_yoy_long_total['date'].dt.strftime('%Y-%m')+"-14"
# total = alt.Chart(prod_yoy_long_total).mark_line(interpolate='monotone', color = '#707070', strokeWidth= 1.5).encode(
#     x=alt.X('date:T', title=''),
#     y=alt.Y('value:Q', title='',
#             axis=alt.Axis(labelExpr="format(datum.value,',') + '%'")),
#     tooltip = ['date:T', 'value:Q']

# )

# points_total = alt.Chart(prod_yoy_long_total).mark_point(filled=True, color='#707070', size=30, opacity=1).encode(
#     x=alt.X('date:T', title=''),
#     y=alt.Y('value:Q', title=''),
#     tooltip=['date:Q', 'value:Q']
# )

# total_text = (
#     alt.Chart(pd.DataFrame({
#         'date': [most_recent_date],
#         'value': [most_recent_value],
#         'label': [f"Production industries: {most_recent_value}%"]
#     }))
#     .mark_text(align='right', dx=185, dy=0, color='#707070', size=12)
#     .encode(
#         x='date:T',
#         y='value:Q',
#         text='label:N'
#     )
# )
# zero_line = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color='#707070', strokeWidth=1, opacity=0.5).encode(
#     y=alt.Y('y:Q')
# )

# chart20 =  zero_line + chart + total + points_total + total_text + image.encode(x=alt.value(460), y=alt.value(-35), opacity=alt.value(0.5))
# chart20.save("charts/production_yoy_stack.json")
# chart20.save("charts/production_yoy_stack.png", scale_factor=2.0)
# chart20.save("charts/production_yoy_stack.svg")
# chart20

In [75]:
yoy_data = data[data['Category'].str.strip().str.casefold() == 'Percentage change, latest month on same month a year ago'.casefold()]
yoy_data2 = yoy_data.rename(columns={'Time Period': 'date'})
yoy_data2['date'] = pd.to_datetime(yoy_data2['date'])
yoy_data2 = yoy_data2.query("date >= '2023-10-01'")
prod_yoy_data = yoy_data2.iloc[:, [0, 5, 6, 7, 8]]
prod_yoy_data_total = yoy_data2.iloc[:, [0, 4]]
prod_yoy_long = prod_yoy_data.melt(id_vars=['date'], var_name='type', value_name='value')
prod_yoy_long['type'] = prod_yoy_long['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
prod_yoy_long_total = prod_yoy_data_total.melt(id_vars=['date'], var_name='type', value_name='value')
prod_yoy_long_total['type'] = prod_yoy_long_total['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)

most_recent_date = prod_yoy_long_total['date'].max()
most_recent_value = prod_yoy_long_total.loc[prod_yoy_long_total['date'] == most_recent_date, 'value'].iloc[0]

# prod_monthly_long['value'] = prod_monthly_long['value'].astype(float)/100

chart = alt.Chart(prod_yoy_long).mark_bar(
    opacity=1
).encode(
    # x=alt.X('date:T', title='', scale=alt.Scale(padding=16), axis=alt.Axis(format='%b %Y')),
    x = alt.X('yearmonth(date):T',scale=alt.Scale(), axis=alt.Axis(format='%b %Y')),
    y=alt.Y('value:Q', title='',
            axis=alt.Axis(labelExpr="format(datum.value,',') + '%'"),
            #  scale=alt.Scale(domain=[-0.5, 0.5]),
              stack='zero'),
    color=alt.Color('type:N', sort=alt.Sort('ascending'),
                    legend=alt.Legend(orient='top', direction='horizontal', title=''),
                    # scale=alt.Scale(scheme='pastel1')),
                    scale=alt.Scale(
            range=[
            "#A3E6E4",  # Pastel of #36B7B4
            # "#F597A6",  # Pastel of #E6224B
            "#F9E3A8",  # Pastel of #F4C245
            "#7DB8E0",  # Pastel of #0063AF
            "#80D4B0",  # Pastel of #00A767
            "#89D3F4",  # Pastel of #179FDB
            "#F7A485",  # Pastel of #EB5C2E
        ])),
     tooltip = ['date:T', 'value:Q', 'type:N']

).properties(
     width= 400,
        height=300
)

prod_yoy_long_total['date'] = prod_yoy_long_total['date'].dt.strftime('%Y-%m')+"-14"
total = alt.Chart(prod_yoy_long_total).mark_line(interpolate='monotone', color = '#707070', strokeWidth= 1.5).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title='',
            axis=alt.Axis(labelExpr="format(datum.value,',') + '%'")),
    tooltip = ['date:T', 'value:Q']

)

points_total = alt.Chart(prod_yoy_long_total).mark_point(filled=True, color='#707070', size=30, opacity=1).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title=''),
    tooltip=['date:Q', 'value:Q']
)

total_text = (
    alt.Chart(pd.DataFrame({
        'date': [most_recent_date],
        'value': [most_recent_value],
        'label': [f"Production industries: {most_recent_value}%"]
    }))
    .mark_text(align='right', dx=185, dy=0, color='#707070', size=12)
    .encode(
        x='date:T',
        y='value:Q',
        text='label:N'
    )
)
zero_line = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color='#707070', strokeWidth=1, opacity=0.5).encode(
    y=alt.Y('y:Q')
)

chart20 =  zero_line + chart + total + points_total + total_text + image.encode(x=alt.value(460), y=alt.value(-35), opacity=alt.value(0.5))
chart20.save("charts/production_yoy_stack.json")
chart20.save("charts/production_yoy_stack.png", scale_factor=2.0)
chart20.save("charts/production_yoy_stack.svg")
chart20

  yoy_data2['date'] = pd.to_datetime(yoy_data2['date'])


In [76]:
# filtering data to monthly data and making sure date is datetime format
monthly_data20 = data2[
data2['Category'].str.strip().str.casefold() == 'contributon to growth, latest month on previous month'.casefold()]

monthly_data20 = monthly_data20[
    monthly_data20['Time Period'].notna() &  # Ensure 'Time Period' is not NA
    (monthly_data20['Time Period'] != '[Not applicable]')  # Exclude rows with specific unwanted value
]
monthly_data20['Time Period'] = pd.to_datetime(monthly_data20['Time Period'], format='%Y %b')

cont_month_data = monthly_data20.iloc[:, [0, 2, 4, 9, 10]]
cont_month_data_long = cont_month_data.melt(id_vars=['Time Period'], var_name='type', value_name='value')
cont_month_data_long = cont_month_data_long.rename(columns={'Time Period': 'date'})
cont_month_data_long = cont_month_data_long.query("date >= '2023-10-01'")
cont_month_data_long['type'] = cont_month_data_long['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
cont_month_data_long['type'] = cont_month_data_long['type'].str.strip()

cont_month_data_long['type'] = cont_month_data_long['type'].replace({
    'Total GVA at basic prices': 'GDP',  # Replace 'Total GVA' with 'GDP'
    'Total service industries': 'Services',
    'Total production industries': 'Production',
    'Construction [note 5]' : 'Construction'
    # Add more replacements as needed
})

# splitting data so we have separate df for gdp which will be line not bars
cont_month_gdp = cont_month_data_long[cont_month_data_long['type'] == 'GDP'].reset_index(drop=True)  # Rows with 'type' == 'GDP'
cont_month_data_long = cont_month_data_long[cont_month_data_long['type'] != 'GDP'].reset_index(drop=True)

most_recent_date = cont_month_gdp['date'].max()
most_recent_value = cont_month_gdp.loc[cont_month_gdp['date'] == most_recent_date, 'value'].iloc[0]

chart = alt.Chart(cont_month_data_long).mark_bar(
).encode(
    # x=alt.X('date:T', title='', scale=alt.Scale(padding=16), axis=alt.Axis(format='%b %Y')),
    x = alt.X('yearmonth(date):T',scale=alt.Scale(), axis=alt.Axis(format='%b %Y')),
    y=alt.Y('value:Q', title='',
            axis=alt.Axis(labelExpr="format(datum.value,',') + '%'"),
            #  scale=alt.Scale(domain=[-0.5, 0.5]),
              stack='zero'),
    color=alt.Color('type:N', sort=alt.Sort('ascending'),
                    legend=alt.Legend(orient='top', direction='horizontal', title=''),
                     scale=alt.Scale(
           range=[
            "#A3E6E4",  # Pastel of #36B7B4
            # "#F597A6",  # Pastel of #E6224B
            # "#F9E3A8",  # Pastel of #F4C245
            "#7DB8E0",  # Pastel of #0063AF
            "#80D4B0",  # Pastel of #00A767
            "#89D3F4",  # Pastel of #179FDB
            "#F7A485",  # Pastel of #EB5C2E
        ]
        )),
     tooltip = ['date:T', 'value:Q', 'type:N']

).properties(
     width= 400,
        height=300
)

# shifts line to middle of bars
cont_month_gdp['date'] = cont_month_gdp['date'].dt.strftime('%Y-%m')+"-14"

total = alt.Chart(cont_month_gdp).mark_line(interpolate='monotone', color = '#707070', strokeWidth= 1.5).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title='',
            axis=alt.Axis(labelExpr="format(datum.value,',') + '%'")),
    tooltip = ['date:T', 'value:Q']

)

points_total = alt.Chart(cont_month_gdp).mark_point(filled=True, color='#707070', size=30, opacity=1).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title=''),
    tooltip=['date:Q', 'value:Q']
)

total_text = (
    alt.Chart(pd.DataFrame({
        'date': [most_recent_date],
        'value': [most_recent_value],
        'label': [f"GDP: {most_recent_value}%"]
    }))
    .mark_text(align='right', dx=100, dy=0, color='#707070', size=12)
    .encode(
        x='date:T',
        y='value:Q',
        text='label:N'
    )
)
zero_line = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color='#707070', strokeWidth=1, opacity=0.5).encode(
    y=alt.Y('y:Q')
)

chart100 =  zero_line + chart + total + points_total + total_text + image.encode(x=alt.value(400), y=alt.value(-15), opacity=alt.value(0.5))
chart100.save("charts/contributions_month.json")
chart100.save("charts/contributions_month.png", scale_factor=2.0)
chart100.save("charts/contributions_month.svg")
chart100

In [77]:
# filtering data to monthly data and making sure date is datetime format
yoy_data20 = data2[
data2['Category'].str.strip().str.casefold() == 'Contribution to growth, latest month on same month a year ago'.casefold()]

yoy_data20 = yoy_data20[
    yoy_data20['Time Period'].notna() &  # Ensure 'Time Period' is not NA
    (yoy_data20['Time Period'] != '[Not applicable]')  # Exclude rows with specific unwanted value
]
yoy_data20['Time Period'] = pd.to_datetime(yoy_data20['Time Period'], format='%Y %b')

cont_yoy_data = yoy_data20.iloc[:, [0, 2, 4, 9, 10]]
cont_yoy_data_long = cont_yoy_data.melt(id_vars=['Time Period'], var_name='type', value_name='value')
cont_yoy_data_long = cont_yoy_data_long.rename(columns={'Time Period': 'date'})
cont_yoy_data_long = cont_yoy_data_long.query("date >= '2023-10-01'")
cont_yoy_data_long['type'] = cont_yoy_data_long['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
cont_yoy_data_long['type'] = cont_yoy_data_long['type'].str.strip()
cont_yoy_data_long['type'] = cont_yoy_data_long['type'].replace({
    'Total GVA at basic prices': 'GDP',  # Replace 'Total GVA' with 'GDP'
    'Total service industries': 'Services',
    'Total production industries': 'Production',
    'Construction [note 5]' : 'Construction'
    # Add more replacements as needed
})
# splitting data so we have separate df for gdp which will be line not bars
cont_yoy_gdp = cont_yoy_data_long[cont_yoy_data_long['type'] == 'GDP'].reset_index(drop=True)  # Rows with 'type' == 'GDP'
cont_yoy_data_long = cont_yoy_data_long[cont_yoy_data_long['type'] != 'GDP'].reset_index(drop=True)

most_recent_date = cont_yoy_gdp['date'].max()
most_recent_value = cont_yoy_gdp.loc[cont_yoy_gdp['date'] == most_recent_date, 'value'].iloc[0]

chart = alt.Chart(cont_yoy_data_long).mark_bar(
).encode(
    # x=alt.X('date:T', title='', scale=alt.Scale(padding=16), axis=alt.Axis(format='%b %Y')),
    x = alt.X('yearmonth(date):T',scale=alt.Scale(), axis=alt.Axis(format='%b %Y')),
    y=alt.Y('value:Q', title='',
            axis=alt.Axis(labelExpr="format(datum.value,',') + '%'"),
            #  scale=alt.Scale(domain=[-0.5, 0.5]),
              stack='zero'),
    color=alt.Color('type:N', sort=alt.Sort('ascending'),
                    legend=alt.Legend(orient='top', direction='horizontal', title=''),
                     scale=alt.Scale(
            range=[
            "#A3E6E4",  # Pastel of #36B7B4
            # "#F597A6",  # Pastel of #E6224B
            # "#F9E3A8",  # Pastel of #F4C245
            "#7DB8E0",  # Pastel of #0063AF
            "#80D4B0",  # Pastel of #00A767
            "#89D3F4",  # Pastel of #179FDB
            "#F7A485",  # Pastel of #EB5C2E
        ]
        )),
     tooltip = ['date:T', 'value:Q', 'type:N']

).properties(
     width= 400,
        height=300
)

# shifts line to middle of bars
cont_yoy_gdp['date'] = cont_yoy_gdp['date'].dt.strftime('%Y-%m')+"-14"

total = alt.Chart(cont_yoy_gdp).mark_line(interpolate='monotone', color = '#707070', strokeWidth= 1.5).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title='',
            axis=alt.Axis(labelExpr="format(datum.value,',') + '%'")),
    tooltip = ['date:T', 'value:Q']

)

points_total = alt.Chart(cont_yoy_gdp).mark_point(filled=True, color='#707070', size=30, opacity=1).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title=''),
    tooltip=['date:Q', 'value:Q']
)

total_text = (
    alt.Chart(pd.DataFrame({
        'date': [most_recent_date],
        'value': [most_recent_value],
        'label': [f"GDP: {most_recent_value}%"]
    }))
    .mark_text(align='right', dx=90, dy=0, color='#707070', size=12)
    .encode(
        x='date:T',
        y='value:Q',
        text='label:N'
    )
)
zero_line = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color='#707070', strokeWidth=1, opacity=0.5).encode(
    y=alt.Y('y:Q')
)

chart200 =  zero_line + chart + total + points_total + total_text +  image.encode(x=alt.value(400), y=alt.value(-15), opacity=alt.value(0.5))
chart200.save("charts/contributions_yoy.json")
chart200.save("charts/contributions_yoy.png", scale_factor=2.0)
chart200.save("charts/contributions_yoy.svg")
chart200

In [78]:
# filtering data to monthly data and making sure date is datetime format
monthly_data20 = data2[
data2['Category'].str.strip().str.casefold() == 'contributon to growth, latest month on previous month'.casefold()]

monthly_data20 = monthly_data20[
    monthly_data20['Time Period'].notna() &  # Ensure 'Time Period' is not NA
    (monthly_data20['Time Period'] != '[Not applicable]')  # Exclude rows with specific unwanted value
]
monthly_data20['Time Period'] = pd.to_datetime(monthly_data20['Time Period'], format='%Y %b')

# cont_month_data2 = monthly_data20.iloc[:, [0, 2, 4, 9, 10]]

cont_month_data2 = monthly_data20.iloc[:, ~monthly_data20.columns.isin(monthly_data20.columns[[1, 2, 4, 10]])]
cont_month_data2 = cont_month_data2.loc[[cont_month_data2['Time Period'].idxmax()]]

cont_month_data_long2 = cont_month_data2.melt(id_vars=['Time Period'], var_name='type', value_name='value')
cont_month_data_long2 = cont_month_data_long2.rename(columns={'Time Period': 'date'})
cont_month_data_long2['type'] = cont_month_data_long2['type'].str.strip()
cont_month_data_long2['type'] = cont_month_data_long2['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
cont_month_data_long2

cont_month_data_long2['type'] = cont_month_data_long2['type'].map(lambda x: split_text(x, 60)) # Use the split_text function to split the long division names
cont_month_data_long2['zero_dummy'] = 0

cont_month_data_long2 = cont_month_data_long2.sort_values('value', ascending=True)  # Sort the data by the value column
# Initialize 'highlight' column to False
cont_month_data_long2['highlight'] = False


# Find the maximum value(s)
max_value = cont_month_data_long2['value'].max()
max_types = cont_month_data_long2[cont_month_data_long2['value'] == max_value]['type'].tolist()

# Highlight the type(s) with the maximum value
cont_month_data_long2['highlight'] = cont_month_data_long2['type'].isin(max_types)
cont_month_data_long2['value'] = cont_month_data_long2['value'].astype(float)/100

chart1000 = alt.Chart(cont_month_data_long2).encode(
    y=alt.Y('type:N',
            title='',
            sort='-x',
            axis=alt.Axis(
               labelBaseline= 'middle',
               labels=False,
                domain=False,
                labelLimit=9999999)
    ),

    x=alt.X('value:Q', title='',
            # scale=alt.Scale(domainMin=-0.0019),
            axis=alt.Axis(
                labelLimit=200,  format='%',  offset=5),
        ),
        color=alt.Color('highlight:N', scale=alt.Scale(domain=[False, True], range=['#707070', '#36B7B4']), legend=None)
)

bars = chart1000.mark_bar()
labels = chart1000.mark_text(
    opacity=0.7,
    align=alt.expr("if(datum.value < 0, 'right', 'left')"),
    baseline='middle',
    dx=alt.expr("datum.value < 0 ? -5 : 5"),
    color='black',
    fontSize=10,
    text=alt.expr("format(datum.value, '.2%')")
)

type_labels = chart1000.mark_text(
    align=alt.expr("datum.value < 0 ? 'left' : 'right'"),
    dx=alt.expr("datum.value < 0 ? 5 : -5"),
    opacity=0.7,
).encode(
    text=alt.Text('type:N'),
    x=alt.X('zero_dummy:Q'),
)

chart1000 = bars + labels + image.encode(x=alt.value(320), y=alt.value(-15), opacity=alt.value(0.5))  + type_labels

chart1000 = chart1000.properties(
    width=300,
    height=400
).configure_bar(
)

chart1000.save("charts/contributions_industry_month.json")
chart1000.save("charts/contributions_industry_month.png", scale_factor=2.0)
chart1000.save("charts/contributions_industry_month.svg")

chart1000

In [79]:
# filtering data to monthly data and making sure date is datetime format
monthly_data20 = data2[
data2['Category'].str.strip().str.casefold() == 'contributon to growth, latest month on previous month'.casefold()]

monthly_data20 = monthly_data20[
    monthly_data20['Time Period'].notna() &  # Ensure 'Time Period' is not NA
    (monthly_data20['Time Period'] != '[Not applicable]')  # Exclude rows with specific unwanted value
]
monthly_data20['Time Period'] = pd.to_datetime(monthly_data20['Time Period'], format='%Y %b')

# cont_month_data2 = monthly_data20.iloc[:, [0, 2, 4, 9, 10]]

cont_month_data2 = monthly_data20.iloc[:, ~monthly_data20.columns.isin(monthly_data20.columns[[1, 2, 4, 10]])]
cont_month_data2 = cont_month_data2.loc[[cont_month_data2['Time Period'].idxmax()]]

cont_month_data_long2 = cont_month_data2.melt(id_vars=['Time Period'], var_name='type', value_name='value')
cont_month_data_long2 = cont_month_data_long2.rename(columns={'Time Period': 'date'})
cont_month_data_long2['type'] = cont_month_data_long2['type'].str.strip()
cont_month_data_long2['type'] = cont_month_data_long2['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
cont_month_data_long2

cont_month_data_long2['type'] = cont_month_data_long2['type'].map(lambda x: split_text(x, 60)) # Use the split_text function to split the long division names
cont_month_data_long2['zero_dummy'] = 0

cont_month_data_long2 = cont_month_data_long2.sort_values('value', ascending=True)  # Sort the data by the value column
# Initialize 'highlight' column to False
cont_month_data_long2['highlight'] = False


# Find the maximum value(s)
min_value = cont_month_data_long2['value'].min()
min_types = cont_month_data_long2[cont_month_data_long2['value'] == min_value]['type'].tolist()

# Highlight the type(s) with the maximum value
cont_month_data_long2['highlight'] = cont_month_data_long2['type'].isin(min_types)
cont_month_data_long2['value'] = cont_month_data_long2['value'].astype(float)/100

chart1500 = alt.Chart(cont_month_data_long2).encode(
    y=alt.Y('type:N',
            title='',
            sort='-x',
            axis=alt.Axis(
               labelBaseline= 'middle',
               labels=False,
                domain=False,
                labelLimit=9999999)
    ),

    x=alt.X('value:Q', title='',
            # scale=alt.Scale(domainMin=-0.0019),
            axis=alt.Axis(
                labelLimit=200,  format='%',  offset=5),
        ),
        color=alt.Color('highlight:N', scale=alt.Scale(domain=[False, True], range=['#707070', '#36B7B4']), legend=None)
)

bars = chart1500.mark_bar()
labels = chart1500.mark_text(
    opacity=0.7,
    align=alt.expr("if(datum.value < 0, 'right', 'left')"),
    baseline='middle',
    dx=alt.expr("datum.value < 0 ? -5 : 5"),
    color='black',
    fontSize=10,
    text=alt.expr("format(datum.value, '.2%')")
)

type_labels = chart1500.mark_text(
    align=alt.expr("datum.value < 0 ? 'left' : 'right'"),
    dx=alt.expr("datum.value < 0 ? 5 : -5"),
    opacity=0.7,
).encode(
    text=alt.Text('type:N'),
    x=alt.X('zero_dummy:Q'),
)

chart1500 = bars + labels + image.encode(x=alt.value(320), y=alt.value(-15), opacity=alt.value(0.5))  + type_labels

chart1500 = chart1500.properties(
    width=300,
    height=400
).configure_bar(
)

chart1500.save("charts/contributions_industry_month_min_highlighted.json")
chart1500.save("charts/contributions_industry_month_min_highlighted.png", scale_factor=2.0)
chart1500.save("charts/contributions_industry_month_min_highlighted.svg")

chart1500

In [80]:
cont_yoy_data2 = yoy_data20.iloc[:, ~yoy_data20.columns.isin(yoy_data20.columns[[1, 2, 4, 10]])]

cont_yoy_data2 = cont_yoy_data2.loc[[cont_yoy_data2['Time Period'].idxmax()]]

cont_yoy_data_long2 = cont_yoy_data2.melt(id_vars=['Time Period'], var_name='type', value_name='value')
cont_yoy_data_long2 = cont_yoy_data_long2.rename(columns={'Time Period': 'date'})
cont_yoy_data_long2['type'] = cont_yoy_data_long2['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
cont_yoy_data_long2

cont_yoy_data_long2['type'] = cont_yoy_data_long2['type'].map(lambda x: split_text(x, 60)) # Use the split_text function to split the long division names
cont_yoy_data_long2['zero_dummy'] = 0

cont_yoy_data_long2 = cont_yoy_data_long2.sort_values('value', ascending=True)  # Sort the data by the value column
# Initialize 'highlight' column to False
cont_month_data_long2['highlight'] = False

# Find the maximum value(s)
max_value = cont_yoy_data_long2['value'].max()
max_types = cont_yoy_data_long2[cont_yoy_data_long2['value'] == max_value]['type'].tolist()

# Highlight the type(s) with the maximum value
cont_yoy_data_long2['highlight'] = cont_yoy_data_long2['type'].isin(max_types)
cont_yoy_data_long2['value'] = cont_yoy_data_long2['value'].astype(float)/100


chart2000 = alt.Chart(cont_yoy_data_long2).encode(
    y=alt.Y('type:N',
            title='',
            sort='-x',
            axis=alt.Axis(
               labelBaseline= 'middle',
               labels=False,
                domain=False,
                labelLimit=9999999)
    ),

    x=alt.X('value:Q', title='',
            # scale=alt.Scale(domainMin=-0.0019),
            axis=alt.Axis(
                labelLimit=200,  format='%',  offset=5),
        ),
        color=alt.Color('highlight:N', scale=alt.Scale(domain=[False, True], range=['#707070', '#36B7B4']), legend=None)
)

bars = chart2000.mark_bar()
labels = chart2000.mark_text(
    opacity=0.7,
    align=alt.expr("if(datum.value < 0, 'right', 'left')"),
    baseline='middle',
    dx=alt.expr("datum.value < 0 ? -5 : 5"),
    color='black',
    fontSize=10,
    text=alt.expr("format(datum.value, '.2%')")
)

type_labels = chart2000.mark_text(
    align=alt.expr("datum.value < 0 ? 'left' : 'right'"),
    dx=alt.expr("datum.value < 0 ? 5 : -5"),
    opacity=0.7,
).encode(
    text=alt.Text('type:N'),
    x=alt.X('zero_dummy:Q'),
)

chart2000 = bars + labels + image.encode(x=alt.value(260), y=alt.value(-15), opacity=alt.value(0.5))  + type_labels

chart2000 = chart2000.properties(
    width=300,
    height=400
).configure_bar(
)

chart2000.save("charts/contributions_industry_yoy.json")
chart2000.save("charts/contributions_industry_yoy.png", scale_factor=2.0)
chart2000.save("charts/contributions_industry_yoy.svg")

chart2000

In [81]:
cont_yoy_data2 = yoy_data20.iloc[:, ~yoy_data20.columns.isin(yoy_data20.columns[[1, 2, 4, 10]])]

cont_yoy_data2 = cont_yoy_data2.loc[[cont_yoy_data2['Time Period'].idxmax()]]

cont_yoy_data_long2 = cont_yoy_data2.melt(id_vars=['Time Period'], var_name='type', value_name='value')
cont_yoy_data_long2 = cont_yoy_data_long2.rename(columns={'Time Period': 'date'})
cont_yoy_data_long2['type'] = cont_yoy_data_long2['type'].str.replace(r'\s*\([^)]+\)', '', regex=True)
cont_yoy_data_long2

cont_yoy_data_long2['type'] = cont_yoy_data_long2['type'].map(lambda x: split_text(x, 60)) # Use the split_text function to split the long division names
cont_yoy_data_long2['zero_dummy'] = 0

cont_yoy_data_long2 = cont_yoy_data_long2.sort_values('value', ascending=True)  # Sort the data by the value column
# Initialize 'highlight' column to False
cont_month_data_long2['highlight'] = False

# Find the maximum value(s)
min_value = cont_yoy_data_long2['value'].min()
min_types = cont_yoy_data_long2[cont_yoy_data_long2['value'] == min_value]['type'].tolist()

# Highlight the type(s) with the maximum value
cont_yoy_data_long2['highlight'] = cont_yoy_data_long2['type'].isin(min_types)
cont_yoy_data_long2['value'] = cont_yoy_data_long2['value'].astype(float)/100


chart2500 = alt.Chart(cont_yoy_data_long2).encode(
    y=alt.Y('type:N',
            title='',
            sort='-x',
            axis=alt.Axis(
               labelBaseline= 'middle',
               labels=False,
                domain=False,
                labelLimit=9999999)
    ),

    x=alt.X('value:Q', title='',
            # scale=alt.Scale(domainMin=-0.0019),
            axis=alt.Axis(
                labelLimit=200,  format='%',  offset=5),
        ),
        color=alt.Color('highlight:N', scale=alt.Scale(domain=[False, True], range=['#707070', '#36B7B4']), legend=None)
)

bars = chart2500.mark_bar()
labels = chart2500.mark_text(
    opacity=0.7,
    align=alt.expr("if(datum.value < 0, 'right', 'left')"),
    baseline='middle',
    dx=alt.expr("datum.value < 0 ? -5 : 5"),
    color='black',
    fontSize=10,
    text=alt.expr("format(datum.value, '.2%')")
)

type_labels = chart2500.mark_text(
    align=alt.expr("datum.value < 0 ? 'left' : 'right'"),
    dx=alt.expr("datum.value < 0 ? 5 : -5"),
    opacity=0.7,
).encode(
    text=alt.Text('type:N'),
    x=alt.X('zero_dummy:Q'),
)

chart2500 = bars + labels + image.encode(x=alt.value(260), y=alt.value(-15), opacity=alt.value(0.5))  + type_labels

chart2500 = chart2500.properties(
    width=300,
    height=400
).configure_bar(
)

chart2500.save("charts/contributions_industry_yoy_min_highlighted.json")
chart2500.save("charts/contributions_industry_yoy_min_highlighted.png", scale_factor=2.0)
chart2500.save("charts/contributions_industry_yoy_min_highlighted.svg")

chart2500

In [82]:
data3['Time period'] = pd.to_datetime(data3['Time period'], format='%b %Y', errors='coerce')

# Find the most recent date
most_recent_date = data3['Time period'].max()

# Filter the DataFrame to include only the most recent date
construction_month = data3[data3['Time period'] == most_recent_date]

construction_month_long = construction_month.melt(id_vars=['Time period'], var_name='type', value_name='value')
construction_month_long['value'] = construction_month_long['value'].astype(float)/100

construction_month_long['type'] = construction_month_long['type'].str.strip()
construction_month_long['highlight'] = False
construction_month_long.loc[
    construction_month_long['type'] == 'All work', 'highlight'
] = True

# construction_month_long['type'] = construction_month_long['type'].map(lambda x: split_text(x, 60)) # Use the split_text function to split the long division names
construction_month_long['zero_dummy'] = 0

construction_month_long = construction_month_long.sort_values('value', ascending=True)  # Sort the data by the value column
# Initialize 'highlight' column to False

chart3000 = alt.Chart(construction_month_long).encode(
    y=alt.Y('type:N',
            title='',
            sort='-x',
            axis=alt.Axis(
               labelBaseline= 'middle',
               labels=False,
                domain=False,
                labelLimit=9999999)
    ),

    x=alt.X('value:Q', title='',
            # scale=alt.Scale(domainMin=-0.04),
            axis=alt.Axis(
                labelLimit=200,  format='%',  offset=5),
        ),
        color=alt.Color('highlight:N', scale=alt.Scale(domain=[False, True], range=['#707070', '#36B7B4']), legend=None)
)

bars = chart3000.mark_bar()
labels = chart3000.mark_text(
    opacity=0.7,
    align=alt.expr("if(datum.value < 0, 'right', 'left')"),
    baseline='middle',
    dx=alt.expr("datum.value < 0 ? -5 : 5"),
    color='black',
    fontSize=10,
    text=alt.expr("format(datum.value, '.2%')")
)

type_labels = chart3000.mark_text(
    align=alt.expr("datum.value < 0 ? 'left' : 'right'"),
    dx=alt.expr("datum.value < 0 ? 5 : -5"),
    opacity=0.7,
).encode(
    text=alt.Text('type:N'),
    x=alt.X('zero_dummy:Q'),
)

chart3000 = bars + labels + image.encode(x=alt.value(280), y=alt.value(-15), opacity=alt.value(0.5))  + type_labels

chart3000 = chart3000.properties(
    width=300,
    height=400
).configure_bar(
)

chart3000.save("charts/construction_month.json")
chart3000.save("charts/construction_month.png", scale_factor=2.0)
chart3000.save("charts/construction_month.svg")

chart3000

In [83]:
data4['Time period'] = pd.to_datetime(data4['Time period'], format='%b %Y', errors='coerce')

# Find the most recent date
most_recent_date = data4['Time period'].max()

# Filter the DataFrame to include only the most recent date
construction_yoy = data4[data4['Time period'] == most_recent_date]

construction_yoy_long = construction_yoy.melt(id_vars=['Time period'], var_name='type', value_name='value')
construction_yoy_long['value'] = construction_yoy_long['value'].astype(float)/100

construction_yoy_long['type'] = construction_yoy_long['type'].str.strip()
construction_yoy_long['highlight'] = False
construction_yoy_long.loc[
    construction_yoy_long['type'] == 'All work', 'highlight'
] = True

# construction_yoy_long['type'] = construction_yoy_long['type'].map(lambda x: split_text(x, 60)) # Use the split_text function to split the long division names
construction_yoy_long['zero_dummy'] = 0

construction_yoy_long = construction_yoy_long.sort_values('value', ascending=True)  # Sort the data by the value column
# Initialize 'highlight' column to False
construction_yoy_long['highlight'] = False
construction_yoy_long.loc[
    construction_yoy_long['type'] == 'All work', 'highlight'
] = True

chart4000 = alt.Chart(construction_yoy_long).encode(
    y=alt.Y('type:N',
            title='',
            sort='-x',
            axis=alt.Axis(
               labelBaseline= 'middle',
               labels=False,
                domain=False,
                labelLimit=9999999)
    ),

    x=alt.X('value:Q', title='',
            # scale=alt.Scale(domainMin=-0.06),
            axis=alt.Axis(
                labelLimit=200,  format='%',  offset=5),
        ),
        color=alt.Color('highlight:N', scale=alt.Scale(domain=[False, True], range=['#707070', '#36B7B4']), legend=None)
)

bars = chart4000.mark_bar()
labels = chart4000.mark_text(
    opacity=0.7,
    align=alt.expr("if(datum.value < 0, 'right', 'left')"),
    baseline='middle',
    dx=alt.expr("datum.value < 0 ? -5 : 5"),
    color='black',
    fontSize=10,
    text=alt.expr("format(datum.value, '.2%')")
)

type_labels = chart4000.mark_text(
    align=alt.expr("datum.value < 0 ? 'left' : 'right'"),
    dx=alt.expr("datum.value < 0 ? 5 : -5"),
    opacity=0.7,
).encode(
    text=alt.Text('type:N'),
    x=alt.X('zero_dummy:Q'),
)

chart4000 = bars + labels + image.encode(x=alt.value(280), y=alt.value(-15), opacity=alt.value(0.5))  + type_labels

chart4000 = chart4000.properties(
    width=300,
    height=400
).configure_bar(
)

chart4000.save("charts/construction_yoy.json")
chart4000.save("charts/construction_yoy.png", scale_factor=2.0)
chart4000.save("charts/construction_yoy.svg")

chart4000