In [19]:
import pandas as pd
import numpy as np
import FUN_ukmrio as uk
import ukmrio_alec_funk_2 as nowcast
df = pd.DataFrame
import pickle
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from tqdm import tqdm
import re
import statsmodels.api as sm

# Upload nowcast data
row_splits = pickle.load( open("disaggregate_fd.p", "rb"))
gdp_rates = pd.read_csv("data/inputs/gdp_growth.csv")
export_rates = pd.read_csv("data/inputs/export_growth.csv")
import_rates = pd.read_csv("data/inputs/import_growth.csv")
deflators = pd.read_csv("deflators_2025.csv", index_col=0)

gdp_rates = gdp_rates.set_index('Region')
export_rates = export_rates.set_index('Region')
import_rates = import_rates.set_index('Region')

full_labels = pd.read_csv("data/MRIOT/UKMRIO_full_labels.csv", header=None).squeeze("columns").tolist()
countries   = pd.read_csv("data/MRIOT/UKMRIO_country_labels.csv", header=None).squeeze("columns").tolist()

In [20]:
t = pd.read_pickle( open("t.p", "rb" ) )
y = pickle.load( open("y.p", "rb" ) )
va = pickle.load( open("va.p", "rb" ) )

Method 2 - MRIO Nowcast

In [21]:
base_year = 2019
lag = 3
projected_year = base_year + lag

print(f"\n-- Projecting {projected_year} from base year {base_year} --")

y_proj, t_proj, va_proj = nowcast.nowcasting_pipeline_method2(
    t, y, countries, row_splits,
    gdp_rates, export_rates, import_rates,
    base_year=base_year, lag=lag, verbose=True
)

y_nowcast = {projected_year: y_proj}
t_nowcast = {projected_year: t_proj}
va_nowcast = {projected_year: va_proj}

print(f"\n-- Finished projecting {projected_year} --")



-- Projecting 2022 from base year 2019 --

Column and Row Sum Balance? Average difference across all regions = 0.00

GDP Consistency Check for UK
Region GDP (per value added) = 2,084,822.71
Region GDP (per C + BoP)     = 2,084,822.71

--- GDP projection for UK ---
Base: 2,084,822.71
Year 2020 growth rate: -10.00%
Year 2021 growth rate: 9.00%
Year 2022 growth rate: 5.00%
Final projected: 2,147,471.64

--- Exports projection for UK ---
Base: 1,054,574.56
Year 2020 growth rate: -11.79%
Year 2021 growth rate: 3.23%
Year 2022 growth rate: 12.57%
Final projected: 1,080,951.34

--- Imports projection for UK ---
Base: 750,320.98
Year 2020 growth rate: -15.88%
Year 2021 growth rate: 5.81%
Year 2022 growth rate: 12.98%
Final projected: 754,501.94

--- UK Demand Projection Comparison ---
Yrr (domestic demand)          | Base: 1,529,055.61 | Nowcast: 1,568,107.22 | YoY: 2.55%
EXr (foreign demand)           | Base: 1,054,574.56 | Nowcast: 1,080,951.34 | YoY: 2.50%
ADr (aggregate demand)         | 

Method 3 - EEMRIO Nowcast

In [22]:
contrib_gdp = pd.read_csv("data/inputs/gdp_cont.csv", index_col=0)
contrib_gdp.columns = pd.to_numeric(contrib_gdp.columns, errors="raise").astype(int)
contrib_gdp = contrib_gdp.apply(pd.to_numeric, errors="coerce")

In [23]:
base_year = 2019
lag = 3
projected_year = base_year + lag

print(f"\n-- Projecting {projected_year} from base year {base_year} --")

y_proj, t_proj, va_proj = nowcast.nowcasting_pipeline_method3(
    t, y, countries, row_splits,
    gdp_rates, export_rates, import_rates, contrib_gdp,
    base_year=base_year, lag=lag, verbose=True
)

y_nowcast = {projected_year: y_proj}
t_nowcast = {projected_year: t_proj}
va_nowcast = {projected_year: va_proj}

print(f"\n-- Finished projecting {projected_year} --")



-- Projecting 2022 from base year 2019 --

Column and Row Sum Balance? Average difference across all regions = 0.00

GDP Consistency Check for UK
Region GDP (per value added) = 2,084,822.71
Region GDP (per C + BoP)     = 2,084,822.71

--- GDP projection for UK ---
Base: 2,084,822.71
Year 2020 growth rate: -10.00%
Year 2021 growth rate: 9.00%
Year 2022 growth rate: 5.00%
Final projected: 2,147,471.64

--- Exports projection for UK ---
Base: 1,054,574.56
Year 2020 growth rate: -11.79%
Year 2021 growth rate: 3.23%
Year 2022 growth rate: 12.57%
Final projected: 1,080,951.34

--- Imports projection for UK ---
Base: 750,320.98
Year 2020 growth rate: -15.88%
Year 2021 growth rate: 5.81%
Year 2022 growth rate: 12.98%
Final projected: 754,501.94

--- UK Demand Projection Comparison ---
Yrr (domestic demand)          | Base: 1,529,055.61 | Nowcast: 1,568,107.22 | YoY: 2.55%
EXr (foreign demand)           | Base: 1,054,574.56 | Nowcast: 1,080,455.27 | YoY: 2.45%
ADr (aggregate demand)         | 

Inflation Steps
- Inflate base years for Method 1
- Inflate nowcasted years for Method 2 and 3

In [24]:
benchmark_multipliers = pd.read_pickle( open("UK_multipliers.p", "rb" ) )
benchmark_foot = pickle.load( open("UK_footprint.p", "rb" ))
deflators = pd.read_csv("deflators_2025.csv", index_col=0)

benchmark_multipliers.index = deflators.columns

base = range(2014, 2020)
base_deflators = {}

for b in base:
    base_deflators[b] = deflators.div(deflators.loc[b])

multi_base_deflators = pd.concat(base_deflators, names=["base_year", "year"])

In [25]:
base_year = 2019
LAG = 3
mult = benchmark_multipliers.copy()

defl = deflators.copy()
defl.index = defl.index.astype(int)

mult.columns = mult.columns.astype(int)

projected_year = base_year + LAG

factor = (defl.loc[base_year] / defl.loc[projected_year]).reindex(mult.index)

# multiplier_base_year expressed in projected_year prices
adjusted_multipliers = pd.DataFrame(
    {projected_year: mult[base_year] * factor},
    index=mult.index)

Emissions Nowcast

In [26]:
ghg = pickle.load( open("ghg.p", "rb" ) )
allyears =  np.array([int(x) for x in range(2000,2023)])
ghg_df = pd.DataFrame({y: ghg[y].squeeze() for y in allyears})
ghg_df.index = ghg[allyears[0]].index

MULTIWORD_REGIONS = [
    "Rest of the World",
    "Rest of the OECD",
    "South Africa"]

def get_region(label: str, multi=MULTIWORD_REGIONS) -> str:
    """Return the region prefix from a row/column label.
    Matches known multi-word regions first; otherwise returns the first token.
    """
    s = re.sub(r"\s+", " ", label.strip())
    s_lower = s.lower()
    for name in multi:
        if s_lower.startswith(name.lower()):
            return name
    # fallback: first word
    return s.split(" ", 1)[0]

regions = ghg_df.index.to_series().apply(get_region)
annual_ghg = ghg_df.groupby(regions).sum()
regions_to_plot = list(annual_ghg.index) 
annual_ghg.sort_index()

gdp_rates = pd.read_csv("data/inputs/gdp_growth.csv")
gdp_rates['Region'] = gdp_rates['Region'].replace({
    'BRA':  'Brazil', 'CHI': 'China', 'IND': 'India', 'JAP': 'Japan',
    'RUS': 'Russia', 'SA': 'South Africa', 'RoW': 'Rest of the World',
    'OECD': 'Rest of the OECD'})
gdp_rates = gdp_rates.set_index('Region')
gdp_rates.columns = [int(c) if str(c).isdigit() else c for c in gdp_rates.columns]
gdp_rates = gdp_rates.drop(columns=[2000])
gdp_rates = gdp_rates.sort_values(by="Region")
gdp_rates.index.name = "Region"

import_rates = pd.read_csv("data/inputs/import_growth.csv")
import_rates['Region'] = import_rates['Region'].replace({
    'BRA': 'Brazil', 'CHI': 'China', 'IND': 'India', 'JAP': 'Japan',
    'RUS': 'Russia', 'SA': 'South Africa', 'RoW': 'Rest of the World',
    'OECD': 'Rest of the OECD'})
import_rates = import_rates.set_index('Region')
import_rates.columns = [int(c) if str(c).isdigit() else c for c in import_rates.columns]
import_rates = import_rates.drop(columns=[2000], errors="ignore")
import_rates = import_rates.sort_index()
import_rates.index.name = "Region"

In [None]:
rates_are_percent = True
LAG = 3
base_year = 2019
projected_year = base_year + LAG
TRAIN_CUTOFF = 2022

def _int_cols(df):
    out = df.copy()
    out.columns = [int(c) if str(c).isdigit() else c for c in out.columns]
    return out

ghg_levels = _int_cols(annual_ghg).copy()
if "Region" in ghg_levels.columns:
    ghg_levels = ghg_levels.set_index("Region")
ghg_levels = ghg_levels.sort_index()

gdp_rates    = _int_cols(gdp_rates).sort_index()
import_rates = _int_cols(import_rates).sort_index()

ghg_rates = ghg_levels.pct_change(axis=1)
if rates_are_percent:
    ghg_rates *= 100.0

regions = sorted(set(gdp_rates.index) & set(import_rates.index) & set(ghg_levels.index))

years_needed = list(range(projected_year - (LAG - 1), projected_year + 1))  # e.g. [t-2, t-1, t] for LAG=3

years_pred = sorted(set(gdp_rates.columns) & set(import_rates.columns))
years_fit  = sorted(set(years_pred) & set(ghg_rates.columns))
years_train = [y for y in years_fit if isinstance(y, (int, np.integer)) and y <= min(TRAIN_CUTOFF, base_year)]

pred_rates_needed = pd.DataFrame(index=regions, columns=years_needed, dtype=float)
pred_level_proj   = pd.Series(index=regions, dtype=float, name=projected_year)
rows_summary      = []

for region in regions:
    if not years_train:
        continue

    # need observed base level
    if base_year not in ghg_levels.columns:
        continue
    base_val = ghg_levels.loc[region, base_year]
    if not np.isfinite(base_val):
        continue

    # FIT on training years
    try:
        x_g = gdp_rates.loc[region, years_train].astype(float).values
        x_m = import_rates.loc[region, years_train].astype(float).values
        y_r = ghg_rates.loc[region, years_train].astype(float).values
    except KeyError:
        continue

    m_fit = np.isfinite(x_g) & np.isfinite(x_m) & np.isfinite(y_r)
    if m_fit.sum() <= 4:
        continue

    X_fit = sm.add_constant(np.column_stack([x_g[m_fit], x_m[m_fit]]))
    y_fit = y_r[m_fit]
    mdl = sm.OLS(y_fit, X_fit).fit()

    yhat_fit = mdl.predict(X_fit)
    mae_fit = float(np.mean(np.abs(y_fit - yhat_fit)))

    rows_summary.append({
        "Region": region,
        "n_obs": int(y_fit.size),
        "R2": float(mdl.rsquared),
        "MAE": mae_fit,
        "Intercept": float(mdl.params[0]),
        "beta_gdp": float(mdl.params[1]),
        "beta_imp": float(mdl.params[2]),
    })

    # PREDICT rates only for years needed
    if not set(years_needed).issubset(set(years_pred)):
        continue

    xg_need = gdp_rates.loc[region, years_needed].astype(float).values
    xm_need = import_rates.loc[region, years_needed].astype(float).values
    m_pred  = np.isfinite(xg_need) & np.isfinite(xm_need)
    if not m_pred.all():
        continue

    X_need = sm.add_constant(np.column_stack([xg_need, xm_need]))
    r_need = mdl.predict(X_need)
    pred_rates_needed.loc[region, years_needed] = r_need

    growth_factors = 1.0 + (r_need / 100.0 if rates_are_percent else r_need)
    pred_level_proj.loc[region] = float(base_val) * float(np.prod(growth_factors))

# Summary table
summary_df = (
    pd.DataFrame(rows_summary)
      .set_index("Region")
      .sort_values("R2", ascending=False)
)

pred_actual_df = (
    pd.DataFrame({
        "Region": regions,
        "Year": projected_year,
        "Predicted": pred_level_proj.reindex(regions).values,
        "Actual": ghg_levels[projected_year].reindex(regions).values if projected_year in ghg_levels.columns else np.nan
    })
    .sort_values(["Region", "Year"])
    .reset_index(drop=True)
)

print(f"\n-- Finished projecting {projected_year} from base year {base_year} --")



-- Finished projecting 2022 from base year 2019 --


In [28]:
pred_actual_df

Unnamed: 0,Region,Year,Predicted,Actual
0,Brazil,2022,1190032.0,1149297.0
1,China,2022,14130990.0,14699950.0
2,EU,2022,2948355.0,2893812.0
3,India,2022,3710979.0,3606197.0
4,Japan,2022,1120051.0,1095561.0
5,Rest of the OECD,2022,3127481.0,3067599.0
6,Rest of the World,2022,12710540.0,12193130.0
7,Russia,2022,2276885.0,2250468.0
8,South Africa,2022,614356.6,482390.9
9,UK,2022,371709.0,381276.4
