# OECD data

Note - stopped working in January 2024. OECD implemented a new API from January 2024

## Python setup

### Imports

In [1]:
# system imports
from pathlib import Path
from typing import Sequence, TypeVar, cast

# analytic imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.lines import Line2D
from pandas import DataFrame, Series

# local imports
from henderson import hma
from plotting import finalise_plot, set_chart_dir
from utility import annualise_percentages, percent_change, qtly_to_monthly

### Settings

In [2]:
# pandas display settings
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 999

# plotting stuff
plt.style.use("fivethirtyeight")

# multi-time-period charts
TODAY = pd.Timestamp("today")
RECENCY_PERIOD = 6  # years
MONTH_ADJ = 3  # months
RECENT = TODAY - pd.DateOffset(years=RECENCY_PERIOD, months=MONTH_ADJ)

# Constants
_DataT = TypeVar("_DataT", Series, DataFrame)  # python 3.11+
SOURCE = "Source: OECD"

# Where to put the charts
CHART_DIR = "./CHARTS/OECD/"
Path(CHART_DIR).mkdir(parents=True, exist_ok=True)
set_chart_dir(CHART_DIR)

# Remove old charts
for filename in Path(CHART_DIR).glob("*.png"):
    filename.unlink()

# display charts in notebook
SHOW = False

## OECD support

### Data

In [3]:
location_map = {
    "AUS": "Australia",
    "AUT": "Austria",
    "BEL": "Belgium",
    "CAN": "Canada",
    "CHL": "Chile",
    "CZE": "Czech Rep.",
    "DNK": "Denmark",
    "EST": "Estonia",
    "FIN": "Finland",
    "FRA": "France",
    "DEU": "Germany",
    "GRC": "Greece",
    "HUN": "Hungary",
    "ISL": "Iceland",
    "IRL": "Ireland",
    "ISR": "Israel",
    "ITA": "Italy",
    "JPN": "Japan",
    "KOR": "Korea",
    "LVA": "Latvia",
    "LUX": "Luxembourg",
    "MEX": "Mexico",
    "NLD": "Netherlands",
    "NZL": "New Zealand",
    "NOR": "Norway",
    "POL": "Poland",
    "PRT": "Portugal",
    "SVK": "Slovak Rep.",
    "SVN": "Slovenia",
    "ESP": "Spain",
    "SWE": "Sweden",
    "CHE": "Switzerland",
    "TUR": "Turkey",
    "GBR": "United Kingdom",
    "USA": "United States",
    "ARG": "Argentina",
    "BRA": "Brazil",
    "CHN": "China",
    "COL": "Colombia",
    "CRI": "Costa Rica",
    "IND": "India",
    "IDN": "Indonesia",
    "LTU": "Lithuania",
    "RUS": "Russia",
    "SAU": "Saudi Arabia",
    "ZAF": "South Africa",
}

In [4]:
def get_chart_groups() -> dict[str, list[str]]:
    """Get chart groups, with no more than 6 lines
    per chart."""

    of_interest = ["AUS", "USA", "CAN", "DEU", "GBR", "JPN"]
    anglosphere = ["AUS", "USA", "CAN", "NZL", "GBR", "IRL"]
    major_europe = ["FRA", "DEU", "ITA", "GBR", "RUS", "ESP"]
    largest_economies = ["USA", "CHN", "JPN", "DEU", "GBR", "IND"]
    asia = ["KOR", "JPN", "CHN", "IND", "IDN"]
    north_europe = ["DNK", "SWE", "NOR", "ISL", "FIN", "GBR"]
    baltic_europe = ["LVA", "LTU", "EST"]
    central_europe = ["CZE", "HUN", "SVK", "SVN", "POL", "GRC"]
    west_europe = ["BEL", "ESP", "PRT", "NLD", "LUX", "FRA"]
    italo_germanic_europe = ["DEU", "AUT", "CHE", "ITA"]
    n_america = ["USA", "CAN", "MEX"]
    c_s_america = ["CHL", "ARG", "BRA", "COL", "CRI"]
    other = ["AUS", "NZL", "SAU", "ZAF", "ISR"]

    charts = {
        "of_interest": of_interest,
        "anglosphere": anglosphere,
        "major_europe": major_europe,
        "largest_economies": largest_economies,
        "asia": asia,
        "north_europe": north_europe,
        "baltic_europe": baltic_europe,
        "central_europe": central_europe,
        "west_europe": west_europe,
        "italo_germanic_europe": italo_germanic_europe,
        "n_america": n_america,
        "c_s_america": c_s_america,
        "other": other,
    }

    return charts


chart_sets = get_chart_groups()

### Python functions

In [5]:
def get_from_oecd(sdmx_query, start: str | None = None) -> DataFrame:
    """Get data table from the OECD using an SDMX query, where SDMX
    stands for the Statistical Data and Metadata eXchange API."""

    url = f"https://stats.oecd.org/SDMX-JSON/data/{sdmx_query}?contentType=csv"

    if start is not None:
        url = f"{url}&startTime={start}"
    return pd.read_csv(url)

In [6]:
def build_query(
    resource: str,
    locations: Sequence[str],
    subject: Sequence[str],
    measure: Sequence[str],
    frequency: None | Sequence[str] = None,
) -> str:
    """Build an SDMC query key."""

    if frequency is not None:
        key = [locations, subject, measure, frequency]
    else:
        key = [locations, subject, measure]
    key_string = f"{'.'.join(['+'.join(x) for x in key])}"
    return f"{resource}/{key_string}"

## OECD Unemployment Rates

### Get UE data

In [7]:
def get_uer_data() -> tuple[DataFrame, str]:
    """Capture UE data from OECD data lake.
    Returns a DataFrame and a string description
    of the data source."""

    # data identifiers - seasonally adjusted monthly unemployment rates
    resource_id = "STLABOUR"  # Short-term labour
    subject = "LRHUTTTT"  # Monthly unemployment rate all persons, sa
    measure = "STSA"
    rfooter = f"Source: OECD {resource_id} {subject} {measure}"

    # capture monthly, then quarterly if no monthly data
    uer_loc_map = tuple(location_map.keys())
    uer = DataFrame()
    for period in ("M", "Q"):  # Monthly must be first
        query = build_query(resource_id, uer_loc_map, [subject], [measure], [period])
        data = get_from_oecd(query, start="2017").pivot(
            values="Value", index="TIME", columns="LOCATION"
        )
        data.index = pd.PeriodIndex(data.index, freq=period)
        data = data.sort_index()

        if period == "Q":
            data = qtly_to_monthly(data)

        uer[data.columns] = data[data.columns]

        # set up second round
        uer_loc_map = tuple(set(uer_loc_map) - set(data.columns))

    return uer, rfooter


uer_data, uer_rfooter = get_uer_data()
uer_data.tail()

Unnamed: 0_level_0,AUS,AUT,BEL,CAN,CHL,COL,CRI,CZE,DEU,DNK,ESP,EST,FIN,FRA,GBR,GRC,HUN,IRL,ISL,ISR,ITA,JPN,KOR,LTU,LUX,LVA,MEX,NLD,NOR,POL,PRT,SVK,SVN,SWE,TUR,USA,CHE,NZL
TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
2023-09,3.556136,5.5,5.4,5.5,8.847384,9.773538,7.69806,2.7,3.1,4.7,11.9,6.5,7.4,7.3,4.2,10.2,4.0,4.7,4.2,3.177481,7.5,2.6,2.6,6.9,5.5,6.5,2.694563,3.7,3.5,2.8,6.6,5.9,3.9,8.1,9.1,3.8,4.124191,3.9
2023-10,3.751259,5.1,5.6,5.7,9.031093,10.24258,7.155119,2.9,3.1,5.3,11.9,6.2,7.5,7.3,4.2,9.3,4.1,4.8,3.3,3.008439,7.6,2.5,2.5,6.5,5.5,6.5,2.668419,3.6,3.7,2.8,6.7,5.8,4.1,7.9,8.6,3.8,,
2023-11,3.870297,4.9,5.6,5.8,,10.33804,7.288546,2.5,3.1,4.9,11.8,6.3,7.6,7.3,,9.4,4.1,4.8,3.2,2.824852,7.4,2.5,2.8,6.5,5.7,6.6,2.809212,3.5,3.8,2.8,6.6,5.8,4.2,7.9,9.0,3.7,,
2023-12,3.882047,5.6,5.7,5.8,,10.81902,,2.8,3.1,5.0,11.7,6.3,7.5,7.3,,9.2,4.2,4.9,3.9,,7.2,2.4,3.3,6.3,5.5,6.6,2.832966,3.6,3.5,2.7,6.6,5.8,4.2,8.2,,3.7,,
2024-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.7,,


### Plot UR Rates 

In [8]:
def plot_uer():
    """Plot national unemployment rates."""

    for name, nation_list in chart_sets.items():
        chart_n = [x for x in nation_list if x in uer_data.columns]
        endpoints = [f"{x}={uer_data[x].dropna().round(1).iloc[-1]}" for x in chart_n]
        lfooter_ur = f"Latest: {', '.join(endpoints)}"

        ax = (
            uer_data[chart_n]
            .rename(columns=location_map)
            .sort_index(axis=1)
            .plot(lw=2.5)
        )
        finalise_plot(
            ax,
            title=f"Unemployment rates - {name.replace('_', ' ').title()}",
            ylabel="Per cent",
            rfooter=uer_rfooter,
            lfooter=lfooter_ur,
            show=SHOW,
        )


plot_uer()

In [9]:
# set a threshold for establishing an OECD mean
print("Data available: \n", uer_data.notna().sum(axis=1).tail())
MIN_UER_REQUIREMENT = 33

Data available: 
 TIME
2023-09    38
2023-10    36
2023-11    34
2023-12    31
2024-01     1
Freq: M, dtype: int64


### Australian UER vs OECD Average

In [10]:
# To do

## OECD GDP data

### Real GDP growth

In [11]:
def get_rgdp() -> tuple[DataFrame, DataFrame, str, str]:
    """Get real GDP data from the OECD."""

    # get the real GDP data
    resource_id = "QNA"  # Quarterly National Accounts
    measure = "VPVOBARSA"  # US dollars, volume estimates, fixed PPPs,
    # OECD reference year, annual levels,
    # seasonally adjusted
    subject = "B1_GE"  # Gross domestic product - expenditure approach
    rfooter = f"Source: OECD {resource_id} {subject} {measure}"
    lfooter = "Volume est., US$, fixed PPPs, Seas Adj"
    locations = tuple(location_map.keys())

    query = build_query(resource_id, locations, [subject], [measure], ["Q"])
    gdp = get_from_oecd(query, start="1959").pivot(
        values="Value", index="TIME", columns="LOCATION"
    )
    gdp.index = pd.PeriodIndex(gdp.index, freq="Q")
    gdp_qtr_growth = DataFrame(percent_change(gdp, 1))  # mypy type cast
    gdp_qtr_growth.tail()

    return gdp, gdp_qtr_growth, rfooter, lfooter


rgdp, rgdp_qtr_growth, rgdp_rfooter, rgdp_lfooter = get_rgdp()
rgdp_qtr_growth.tail()

LOCATION,ARG,AUS,AUT,BEL,BRA,CAN,CHE,CHL,COL,CRI,CZE,DEU,DNK,ESP,EST,FIN,FRA,GBR,GRC,HUN,IDN,IND,IRL,ISL,ISR,ITA,JPN,KOR,LTU,LUX,LVA,MEX,NLD,NOR,NZL,POL,PRT,RUS,SAU,SVK,SVN,SWE,TUR,USA,ZAF
TIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1
2022Q4,-1.691144,0.854938,-0.109462,0.242595,-0.116739,-0.215344,-0.02667,0.134975,-1.636138,2.252248,-0.355483,-0.406273,0.129019,0.491225,-1.436316,-0.54647,-0.049266,0.103799,0.914758,-0.836048,2.336996,1.140989,-1.578874,2.697422,1.071167,-0.244623,0.248074,-0.302966,-0.451993,-2.315748,0.639919,0.677672,0.686298,-0.669131,-0.990936,-2.276954,0.508477,,1.164422,0.234247,0.608573,-1.255339,1.148543,0.635422,-1.094061
2023Q1,0.809382,0.533363,0.091308,0.413038,1.439727,0.610026,0.309544,0.546971,2.171743,-0.584883,0.113507,0.104301,0.906027,0.504719,-0.777552,0.029095,0.046662,0.253156,0.050324,-0.245232,0.837337,2.080532,-1.878232,1.257146,0.753997,0.552947,1.227394,0.329872,-1.855364,0.852667,-0.471083,0.57031,-0.494665,0.214569,-0.645515,1.101975,1.46096,,-1.116415,0.197028,0.157451,0.656898,-0.2521,0.55638,0.406613
2023Q2,-2.727763,0.438746,-1.109172,0.332988,0.984869,0.338964,-0.127102,-0.348748,-0.988242,2.504626,0.080151,0.02778,-0.699461,0.503504,-0.600516,0.345665,0.658527,0.037754,1.102526,-0.029898,0.86641,1.887501,-0.364485,1.599376,0.787809,-0.345907,0.880415,0.608655,2.447922,-0.185081,-0.303618,0.934632,-0.444704,-0.513446,0.862009,0.295513,0.129864,,-0.505201,0.437179,0.993703,-0.629139,3.306437,0.511122,0.457336
2023Q3,2.723691,0.211975,-0.518949,0.381114,0.144538,-0.26663,0.270638,0.331249,0.238777,1.286148,-0.631621,-0.009259,-0.652073,0.394095,-1.293734,-0.856059,-0.029561,-0.128844,0.015572,0.855188,0.809229,1.736145,-1.911116,-3.753676,0.659547,0.103645,-0.72869,0.616075,0.041692,-0.143515,0.174198,0.746187,-0.307952,-0.523183,-0.711937,1.488772,-0.211216,,-3.176127,0.246495,-0.157113,-0.094535,0.272529,1.193871,-0.247536
2023Q4,,,0.230326,0.420255,,0.3,,,,1.781358,0.232705,-0.287058,,0.623115,-0.36545,,-0.018618,,,,,,-0.724855,,,0.186565,,0.630929,-0.291726,,0.429806,0.100001,,,,,0.811385,,0.400002,,,0.094445,,0.809966,


In [12]:
# check for missing data in the final period ...
def check_rgdp(df: DataFrame) -> None:
    """Check rGDP data downloaded from OECD."""

    final_row = df.iloc[-1]
    missing_count = final_row.isna().sum()
    if missing_count:
        print(f"Final period: {final_row.name}")
        print(f"Missing data count for final period: {missing_count}")
        print(f"Missing data belongs to: {df.columns[final_row.isna()].to_list()}")
        print(f"Nations with final data: {df.columns[final_row.notna()].to_list()}")


check_rgdp(rgdp)

Final period: 2023Q4
Missing data count for final period: 26
Missing data belongs to: ['ARG', 'AUS', 'BRA', 'CHE', 'CHL', 'COL', 'DNK', 'FIN', 'GBR', 'GRC', 'HUN', 'IDN', 'IND', 'ISL', 'ISR', 'JPN', 'LUX', 'NLD', 'NOR', 'NZL', 'POL', 'RUS', 'SVK', 'SVN', 'TUR', 'ZAF']
Nations with final data: ['AUT', 'BEL', 'CAN', 'CRI', 'CZE', 'DEU', 'ESP', 'EST', 'FRA', 'IRL', 'ITA', 'KOR', 'LTU', 'LVA', 'MEX', 'PRT', 'SAU', 'SWE', 'USA']


In [13]:
def plot_contractions():
    """GDP identify/count quarterly contractions."""

    contractions = rgdp_qtr_growth < 0
    contraction_count = contractions.sum(axis=1)

    start = pd.Period("1999Q1", freq="Q")
    title = "Num. OECD Monitored States with Qrtly GDP contraction"
    ax = contraction_count[contraction_count.index >= start].plot.bar()
    ax.set_xticks(ax.get_xticks()[::4])
    finalise_plot(
        ax,
        title=title,
        ylabel="Count",
        rfooter=rgdp_rfooter,
        lfooter=rgdp_lfooter,
        show=SHOW,
    )

    # print nations in contraction
    print(f"Latest N={contraction_count.iloc[-1]}")
    print(
        ", ".join(
            [
                location_map[x]
                for x in contractions.iloc[-1][contractions.iloc[-1]].index
            ]
        )
    )


plot_contractions()

Latest N=5
Germany, Estonia, France, Ireland, Lithuania


In [14]:
def plot_recessions():
    """GDP identify/count technical recessions."""

    recessions = (rgdp_qtr_growth < 0) & (rgdp_qtr_growth.shift(1) < 0)
    recession_count = recessions.sum(axis=1)

    title = "Number of OECD Monitored States in Tech. Recession"
    start = pd.Period("1999Q1", freq="Q")
    ax = recession_count[recession_count.index >= start].plot.bar()
    ax.set_xticks(ax.get_xticks()[::4])
    finalise_plot(
        ax,
        title=title,
        ylabel="Count",
        rfooter=rgdp_rfooter,
        lfooter="Recession defined as two quarters of negative GDP growth",
        show=SHOW,
    )

    # print nations in contraction
    print(f"Latest N={recession_count.iloc[-1]}")
    print(
        ", ".join(
            [location_map[x] for x in recessions.iloc[-1][recessions.iloc[-1]].index]
        )
    )


plot_recessions()

Latest N=4
Germany, Estonia, France, Ireland


## OECD inflation data

In [15]:
# some useful constants
HENDERSON = 13
START_COVID = pd.Period("2017-01-01", freq="M")
HIGH_COVID = pd.Period("2021-01-01", freq="M")
EXCLUDE = ["Turkey", "Russia"]
# Turkey has rampant inflation
# Russia not updating data during war

In [16]:
def get_cpi(measure: str = "GY") -> tuple[DataFrame, str]:
    """Get CPI data from OECD.
    Measure is either "GY" for annual growth, or
    "IXOB" for the raw index.
    Returns data and footer-tet."""

    # data reference
    resource_id = "PRICES_CPI"
    subject_tuple = ("CP18ALTT", "CPALTT01")  # New then Old standard
    rfooter = f"Source: OECD {resource_id} {subject_tuple} {measure}"

    # drop problematic states
    cpi_locs = tuple(location_map.keys())

    # capture monthly, then quarterly if no monthly data
    cpi = DataFrame()
    for period in ("M", "Q"):  # Monthly must be first
        for subject in subject_tuple:
            query = build_query(resource_id, cpi_locs, [subject], [measure], [period])
            data = get_from_oecd(query, start="2000")
            data = data.pivot(values="Value", index="TIME", columns="LOCATION")
            data.index = pd.PeriodIndex(data.index, freq=period)
            data = data.sort_index()

            if period == "Q":
                data = qtly_to_monthly(data)
                data = data.reindex(cpi.index, fill_value=np.nan)
            cpi[data.columns] = data[data.columns]

            # set up next round
            cpi_locs = tuple(set(cpi_locs) - set(data.columns))

    cpi.columns = cpi.columns.map(location_map)
    return cpi, rfooter

In [17]:
# Get/create key CPI growth data
def get_growth() -> tuple[dict[str, DataFrame], str]:
    """Generate CPI growth data from CPI index data."""

    cpi, rfooter = get_cpi(measure="IXOB")  # index - not seasonally adjusted
    #display(cpi.tail())
    growth = {}
    growth["annual"] = DataFrame(percent_change(cpi, 12))  # growth rate
    growth["quarterly"] = DataFrame(percent_change(cpi, 3))  # growth rate
    growth["q_annualised"] = DataFrame(
        annualise_percentages(growth["quarterly"], 4)
    )  # growth rate

    # smoothed / trend data
    annual_smooth_d = {}
    qa_smooth_d = {}
    for nation in cpi.columns:
        annual_smooth_d[nation] = hma(growth["annual"][nation].dropna(), HENDERSON)
        qa_smooth_d[nation] = hma(growth["q_annualised"][nation].dropna(), HENDERSON)
    growth["annual_smooth"] = DataFrame(annual_smooth_d)
    growth["qa_smooth"] = DataFrame(qa_smooth_d)
    return growth, rfooter


cpi_growth, cpi_rfooter = get_growth()

### CPI over the past year

In [18]:
# chart an inflation rate target
LOWER_IR, UPPER_IR = 2, 3
TARGET = {
    "ymin": LOWER_IR,
    "ymax": UPPER_IR,
    "color": "#dddddd",
    "label": f"{LOWER_IR}-{UPPER_IR}% inflation target",
    "zorder": -1,
}
TARGET_V = TARGET | {"xmin": LOWER_IR, "xmax": UPPER_IR}
for remove in ("ymin", "ymax"):
    TARGET_V.pop(remove)

In [19]:
def get_recent_ohlc(data: DataFrame) -> DataFrame:
    """For a dataset, build a table of Open, Highm Low, Close
    points for last valid 13 months in each column."""

    # compare progress over 13 months because Australia
    # and New Zealand only collect CPI measures quarterly
    inclusive_year = 13  # months
    index = ["Open", "High", "Low", "Close"]
    summary = DataFrame([], index=index)  # return vehicle
    for name in data.columns:
        if name in EXCLUDE:
            continue
        column = data[name]
        last_valid = cast(pd.Period, column.last_valid_index())  # mypy cast
        year = pd.period_range(end=last_valid, periods=inclusive_year)
        frame = column[year]
        open_ = frame.iloc[0]
        high = frame.max()
        low = frame.min()
        close = frame.iloc[-1]
        key = f"{name} {str(last_valid.year)[2:]}-{last_valid.month:02d}"
        summary[key] = Series([open_, high, low, close], index=index)
    summary = summary.T.sort_values("Close")
    return summary

In [20]:
def plot_ohlc(ohlc_df: DataFrame, horizontal: bool = True, **kwargs) -> None:
    """Plot data in ohlc_df in a open-high-low-close style."""

    def xy(x, y):
        return (x, y) if horizontal else (y, x)

    def set_limits(ax: plt.Axes) -> None:
        minimum = min(0, ohlc_df["Low"].min())  # include zero
        maximum = ohlc_df["High"].max()
        adjustment = (maximum - minimum) * 0.025
        limits = minimum - adjustment, maximum + adjustment
        if horizontal:
            ax.set_xlim(*limits)
        else:
            ax.set_ylim(*limits)

    # canvass
    _, ax = plt.subplots()

    # sort out chart orientation
    good, bad = "darkblue", "darkorange"  # for colour blindness
    bar_method = ax.barh if horizontal else ax.bar
    reference = "left" if horizontal else "bottom"
    range_ = ohlc_df["High"] - ohlc_df["Low"]
    open_marker = "^" if horizontal else "<"
    close_marker = "v" if horizontal else ">"
    color = [
        good if open > close else bad
        for open, close in zip(ohlc_df.Open, ohlc_df.Close)
    ]

    # plot
    bar_method(
        ohlc_df.index,
        range_,
        **{reference: ohlc_df["Low"]},
        color=color,
        linewidth=1.0,
        edgecolor="black",
        label="Range of prints through the 13 months",
        alpha=0.15,
    )
    ax.plot(
        *xy(ohlc_df["Open"], ohlc_df.index),
        marker=open_marker,
        linestyle="None",
        label="First print in the 13 months",
        color=good,
        markersize=5,
    )
    ax.plot(
        *xy(ohlc_df["Close"], ohlc_df.index),
        marker=close_marker,
        linestyle="None",
        label="Last print in the 13 months",
        color=bad,
        markersize=5,
    )
    ax.tick_params(axis="both", which="major", labelsize="x-small")
    set_limits(ax=ax)
    if not horizontal:
        ax.set_xticklabels(ohlc_df.index, rotation=90)
    finalise_plot(ax, **kwargs)

In [21]:
def get_lim(df):
    """Get the limits of the data plus some wriggle room."""

    adjust = (df["High"].max() - df["Low"].min()) * 0.02
    return min(0, df["Low"].min()) - adjust, df["High"].max() + adjust

In [22]:
class InflationPlotter:
    """Plots recent inflation range for selected nations."""

    plot_count = 0

    def plot_selected(self, nations: Sequence, horizontal: bool) -> None:
        """Plot of Inflation over the [ast year - selected nations."""

        summary = get_recent_ohlc(cpi_growth["annual"][nations])

        title_aip = "Annual inflation prints over the most recent year"
        lfooter = (
            "Year and month of latest print in the axis labels. "
            "Range is the 13 months up to and including the latest data. "
        )
        plot_ohlc(
            summary,
            horizontal=horizontal,
            title=title_aip,
            xlim=get_lim(summary) if horizontal else None,
            zero_y=horizontal,
            xlabel="Per cent per year" if horizontal else None,
            ylabel="Per cent per year" if not horizontal else None,
            tag=str(InflationPlotter.plot_count),
            axvspan=TARGET_V if horizontal else None,
            axhspan=TARGET if not horizontal else None,
            y0=not horizontal,
            x0=horizontal,
            legend={"loc": "best", "fontsize": "xx-small"},
            rfooter=f"{SOURCE} CPI_PRICES",
            lfooter=lfooter,
            show=SHOW,
        )
        InflationPlotter.plot_count += 1


ip = InflationPlotter()
ip.plot_selected(
    nations=[
        "Australia",
        "Canada",
        "China",
        "France",
        "Germany",
        "India",
        "Indonesia",
        "Italy",
        "Japan",
        "Korea",
        "United Kingdom",
        "United States",
        "New Zealand",
        "Norway",
        "Sweden",
        "Brazil",
        "Spain",
    ],
    horizontal=True,
)

ip.plot_selected(cpi_growth["annual"].columns.to_list(), horizontal=False)
del ip

  ax.set_xticklabels(ohlc_df.index, rotation=90)


In [23]:
def plot_comparative() -> None:
    """An Anglo-sphere comparison."""

    anglo = sorted([location_map[x] for x in chart_sets["anglosphere"]])
    frames: Sequence[DataFrame] = [
        cpi_growth[x][anglo]
        for x in ("annual", "annual_smooth", "q_annualised", "qa_smooth")
    ]
    tags: Sequence[str] = ("raw", "smooth", "raw", "smooth")
    periods: Sequence[str] = (["Annual"] * 2) + (["Quarterly annualised"] * 2)
    style = ["--", ":", "-", "-.", "--."] * 2

    for frame, tag, period in zip(frames, tags, periods):
        time_limited = frame[frame.index >= START_COVID]
        axes = time_limited.plot(linewidth=2, style=style)
        lfooter = (
            f"{HENDERSON}-term Henderson moving averages. " if tag == "smooth" else ""
        )
        finalise_plot(
            axes,
            title=f"{period} inflation trajectories after COVID",
            xlabel=None,
            ylabel="Per cent per year",
            pre_tag=f"angloshpere-{tag}",
            axhspan=TARGET,
            legend={"ncols": 2, "loc": "best", "fontsize": "xx-small"},
            rfooter=cpi_rfooter,
            lfooter=f"Anglosphere nations. {lfooter}",
            y0=True,
            show=SHOW,
        )


plot_comparative()

### Selected individual nations

In [24]:
def plot_selected():
    """CPI plots for selected nations."""

    selected = (
        "Brazil",
        "Chile",
        "Turkey",
        "Australia",
        "New Zealand",
    )
    for select in selected:
        data = cpi_growth["annual"][select]
        ax = data[data.index >= START_COVID].plot(
            linewidth=2,
        )
        finalise_plot(
            ax,
            title=f"{select}: recent inflation trajectory",
            xlabel=None,
            ylabel="Per cent per year",
            axhspan=TARGET,
            pre_tag="national-inflation-",
            rfooter=cpi_rfooter,
            zero_y=True,
            show=SHOW,
        )


plot_selected()

### Australia vs OECD monitored mean

In [25]:
def check():
    """Check missingness."""

    annual = cpi_growth["annual"]
    for check_last in (2, 1):
        selection = annual.iloc[-check_last]
        print(
            f"Reporting at N-{check_last} (incl. excludeds): {len(selection.dropna())}"
        )

    print("Total nations after exclusions: " f"{len(annual.columns)-len(EXCLUDE)}")
    n_minus_2 = annual.iloc[-2]
    print(
        "Missing at N-2 (including excludeds): "
        f"{n_minus_2[n_minus_2.isna()].index.to_list()}"
    )


check()

# Pick a minimum number of data points before taking an OECD mean
# Note: Australia and NZ only report quarterly
MIN_MEAN_REQUIRED = 38  # allow some wriggle room

Reporting at N-2 (incl. excludeds): 44
Reporting at N-1 (incl. excludeds): 44
Total nations after exclusions: 43
Missing at N-2 (including excludeds): ['Russia']


In [26]:
def cpi_comparative() -> None:
    """CPI comparative."""

    datasets = {
        "Annual": cpi_growth["annual"],
        "Trend annual": cpi_growth["annual_smooth"],
        "Quarterly-annualised": cpi_growth["q_annualised"],
        "Trend Quarterly-annualised": cpi_growth["qa_smooth"],
    }

    for title, data in datasets.items():
        footer = (
            ""
            if "Trend" not in title
            else f"{HENDERSON}-term Henderson moving average. "
        )

        frame = data.drop(columns=EXCLUDE)
        frame = frame[frame.index >= START_COVID]
        ax = frame.plot(lw=1, alpha=0.6)
        ax.get_legend().remove()

        mean_color, aus_color, lw = "black", "#dd0000", 3.5
        mean = frame.dropna(axis=0, thresh=MIN_MEAN_REQUIRED).mean(axis=1)
        mean.plot(lw=lw, color=mean_color, label="Mean")

        frame["Australia"].plot(lw=lw, color=aus_color, label="Australia")
        custom_lines = [
            Line2D([0], [0], color=mean_color, lw=lw),
            Line2D([0], [0], color=aus_color, lw=lw),
        ]
        ax.legend(custom_lines, ["OECD monitored mean", "Australia"], fontsize="small")

        ax.axhline(0, lw=0.75, color="#777777")
        finalise_plot(
            ax,
            title=f"{title} CPI inflation trajectories",
            xlabel=None,
            ylabel="Per cent per year",
            tag="AU-v-OECD",
            lfooter=footer + f'All OECD monitored nations except: {", ".join(EXCLUDE)}',
            rfooter=f"{SOURCE} CPI_PRICES",
            show=SHOW,
        )


cpi_comparative()

## Finished

In [27]:
%reload_ext watermark
%watermark -u -n -t -v -iv -w

Last updated: Thu Mar 21 2024 13:45:53

Python implementation: CPython
Python version       : 3.11.8
IPython version      : 8.22.2

matplotlib: 3.8.3
pandas    : 2.2.1
numpy     : 1.26.4

Watermark: 2.4.3



In [28]:
%reload_ext watermark
%watermark -u -n -t -v -iv -w

Last updated: Thu Mar 21 2024 13:45:53

Python implementation: CPython
Python version       : 3.11.8
IPython version      : 8.22.2

matplotlib: 3.8.3
pandas    : 2.2.1
numpy     : 1.26.4

Watermark: 2.4.3



In [29]:
print("Done")

Done
