In [None]:
import pandas as pd
import requests
import zipfile
import io
import os
import csv
import ctypes as ct
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
import re
import seaborn as sns
import QuantLib as ql

from bgs.load_bgs_amounts import load_bgs_amounts
from bgs.load_gilt_details import load_csv_blocks
from bgs.load_bgs_prices import load_prices
from bgs.gilt_analytics import gilt_yield

In [None]:
def clean_date(bgs_index):
    if re.match(r'^\d{2}\s\w{3}\s+\d{4}$', bgs_index):
        return pd.to_datetime(bgs_index, format="%d %b %Y", errors='coerce').to_period('M').to_timestamp('M')
    else:
        return pd.to_datetime(bgs_index).to_period('M').to_timestamp('M')

def clean_percentage(x):
    if x.strip() in ['Variable','Floating']:
        return x
    try:
        x = float(x)
    except (ValueError, TypeError):
        units, fractions = x.split(' ')
        num, denom = map(float, fractions.split('/'))
        x = float(units) + num / denom
        print(x)
    return x

In [None]:
tables = load_bgs_amounts("downloads/BGSAmounts.csv")

conv = tables['Conventionals']
conv.replace("Redeemed", "", inplace=True)
conv = conv.apply(pd.to_numeric, errors='coerce')
conv.fillna(0, inplace=True)
conv.index = pd.Index([clean_date(x) for x in list(conv.index)]).to_period('M').to_timestamp('M')

old = tables['Calculated indexed nominal Old-style']
old.replace("Redeemed", "", inplace=True)
old = old.apply(pd.to_numeric, errors='coerce')
old.fillna(0, inplace=True)
old.index = pd.Index([clean_date(x) for x in list(old.index)]).to_period('M').to_timestamp('M')

new = tables['Calculated indexed nominal New-style']
new.replace("Redeemed", "", inplace=True)
new = new.apply(pd.to_numeric, errors='coerce')
new.fillna(0, inplace=True)
new.index = pd.Index([clean_date(x) for x in list(new.index)]).to_period('M').to_timestamp('M')

new_no_idx = tables['Index-linked New-style']
new_no_idx.replace("Redeemed", "", inplace=True)
new_no_idx = new_no_idx.apply(pd.to_numeric, errors='coerce')
new_no_idx.fillna(0, inplace=True)
new_no_idx.index = pd.Index([clean_date(x) for x in list(new_no_idx.index)]).to_period('M').to_timestamp('M')


In [None]:
price_df=load_prices("downloads/BGSPrices.csv")
price_df.index = pd.to_datetime(price_df.index, format="%d %b %Y").to_period('M').to_timestamp('M')
for text in ['Amalgamated', 'Redeemed', 'redeemed']:
    price_df = price_df.replace(text, 0)
price_df = price_df.replace('missing', None)
price_df = price_df.ffill(axis=0)
price_df = price_df.fillna(0)

In [None]:
details = load_csv_blocks("downloads/BGSDetails.csv")

details.keys()

In [None]:

conv_details = details['Conventionals']
conv_details['%'] = conv_details['%'].apply(clean_percentage)

new_details = details['Index-Linked New-style']
new_details['%'] = new_details['%'].apply(clean_percentage)

old_details = details['Index-Linked Old-style']
old_details['%'] = old_details['%'].apply(clean_percentage)


In [None]:
last = '2025-04-30'

In [None]:
linkers = new.loc[last, new.loc[last] !=0].reset_index()['index'].to_list()
linkers[0]

In [None]:
last_price = price_df.loc[last, [int(x) for x in linkers]]
principal = new.loc[last, linkers]

In [None]:
principal.index = principal.index.astype(int)
principal.name = 'balance_outstanding'
last_price.name = 'price'

In [None]:
data_set_april = last_price.reset_index().merge(principal.reset_index(), on='index')
data_set_april.rename(columns={'index': 'Sequence'}, inplace=True)

In [None]:
#Average Price
(data_set_april['balance_outstanding']*data_set_april['price']).sum()/data_set_april['balance_outstanding'].sum()

In [None]:
data_set_april.head()

In [None]:
new_details['Sequence'] = new_details['Sequence'].astype(int)

In [None]:
len([x for x in new_details['Sequence'] if x in [int(y) for y in linkers]])

In [None]:
len(linkers)

In [None]:
new_details.columns

In [None]:
date_variables = ["Latest redemption date","Issue date","First coupon payable on date",]
static_variables = ['%', "Frequency","Sequence",'ISIN Code']+date_variables
data_set_april = data_set_april.merge(new_details[static_variables].set_index("Sequence"), on='Sequence', how='inner')
for date_var in date_variables:
    data_set_april[date_var] = data_set_april[date_var].apply(clean_date)
data_set_april.head()

In [None]:
(data_set_april['balance_outstanding']*data_set_april['%']).sum()/data_set_april['balance_outstanding'].sum()

In [None]:
data_set_april['balance_outstanding'].plot(kind='hist')

In [None]:
sns.set_theme(style="darkgrid")

# use the scatterplot function to build the bubble map
ax = sns.scatterplot(
    data=data_set_april,
    x="Latest redemption date",
    y="price",
    size="balance_outstanding",
    legend=False,
    sizes=(10, 500)
)

# show the graph
plt.show()

In [None]:
august = "2022-08-31"
september = "2022-09-30"
december = "2022-12-31"

In [None]:
liz_linkers  = new.loc[august, new.loc[august] !=0].reset_index()['index'].to_list()

In [None]:
august_price = price_df.loc[august, [int(x) for x in liz_linkers]]
september_price = price_df.loc[september, [int(x) for x in liz_linkers]]
december_price = price_df.loc[december, [int(x) for x in liz_linkers]]

In [None]:
last_price.name='Apr-2025'
august_price.name='Aug-2022'
september_price.name='Sep-2022'
december_price.name='Dec-2022'

In [None]:
history = pd.concat([last_price, august_price, september_price, december_price], axis=1)

In [None]:
history = history.replace(0.000, np.nan)
history

In [None]:
history = history.reset_index().rename(columns={'index':'Sequence'})
history = history.merge(new_details[['Sequence','Latest redemption date']], on='Sequence', how='left')

In [None]:
history['Latest redemption date'] = history['Latest redemption date'].apply(clean_date)
history.set_index('Latest redemption date', inplace=True)
history.drop(columns=['Sequence'], inplace=True)


In [None]:
sns.scatterplot(data=history[['Aug-2022','Sep-2022','Dec-2022','Apr-2025']])

In [None]:
higher_prices = [55360,55380, 55465,55485, 55500]

In [None]:
data_set_april.loc[data_set_april['Sequence'].isin(higher_prices)]

In [None]:
data_set_april.loc[data_set_april['Issue date'] == data_set_april['Issue date'].min()]

In [None]:
price_df[55500]['2005-09-30':].plot()

In [None]:
data_set_april.loc[data_set_april['Latest redemption date'] == data_set_april['Latest redemption date'].max()]

In [None]:
from bgs.linker_analytics import linker_real_yield


df_ons_rpi = pd.read_csv("downloads/ONSRPI.csv", header=7, names=["date", "RPI"])
monthly_start = df_ons_rpi[df_ons_rpi["date"] == "1987 JAN"].index[0]
ONS_RPI_INDEX = df_ons_rpi.iloc[monthly_start:].copy()

df_infl = pd.ExcelFile("downloads/GLC Inflation month end data_2025 to present.xlsx")
DF_SPOT = pd.read_excel(
    df_infl, sheet_name="4. spot curve", header=3, skiprows=[4]
).set_index("years:")
DF_SPOT[50] = DF_SPOT[40]

def get_real_yield(row):
    ["Latest redemption date","Issue date","First coupon payable on date",]

    issue_dt = row["Issue date"].strftime("%d/%m/%Y")
    first_coupon_dt = row["First coupon payable on date"].strftime("%d/%m/%Y")
    maturity_dt = row["Latest redemption date"].strftime("%d/%m/%Y")
    coupon = row["%"]
    clean_price = row["price"]

    inflation_spot_curve = DF_SPOT.copy()
    monthly_rpi_index = ONS_RPI_INDEX.copy()

    previous_month_end = (
        pd.to_datetime(last, format="%Y-%m-%d") + pd.offsets.MonthEnd(-1)
    ).strftime("%Y-%m-%d")
    infl_curve = inflation_spot_curve.loc[
        inflation_spot_curve.index == previous_month_end
    ].to_dict(orient="list")

    inflation_quotes = [
        (ql.Period(int(key), ql.Years), value[0])
        for key, value in infl_curve.items()
        if key * 2 // 2 == key
    ]

    today = ql.Date(last, "yyyy-MM-dd")
    issue_date = ql.Date(issue_dt, "dd/MM/yyyy")
    first_coupon_date = ql.Date(first_coupon_dt, "dd/MM/yyyy")
    maturity_date = ql.Date(maturity_dt, "dd/MM/yyyy")
    notional = 100
    fixed_rates = [coupon/100]

    r = linker_real_yield(
        trade_date=today,
        ons_rpi_idx=monthly_rpi_index,
        settlement_days=1,
        inflation_quotes=inflation_quotes,
        notional=notional,
        issue_date=issue_date,
        maturity_date=maturity_date,
        fixedRates=fixed_rates,
        clean_price=clean_price,
        first_coupon_date=first_coupon_date,
    )
    
    return round(r,8)

In [None]:
data_set_april['real_yield']=data_set_april.apply(get_real_yield, axis=1)

In [None]:
data_set_april.plot(x='Latest redemption date', y='real_yield')

In [None]:
sns.scatterplot(
    data=data_set_april,
    x="Issue date",
    y="real_yield",
    size="Latest redemption date",
    legend=False,)


In [None]:
data_set_april.loc[data_set_april['real_yield'] == data_set_april['real_yield'].max()]

In [None]:
data_set_april[data_set_april['Latest redemption date'] > '2051-03-31']

In [None]:
# data_set_april[data_set_april['Latest redemption date'] > '2051-03-31']
data_set_april.sort_values(by='%', ascending=False).head()

In [None]:
data_set_april[data_set_april['price']<80]['balance_outstanding'].sum()

In [None]:
def calculate_lme_balance(row):
    if row['price'] > 80:
        return row['balance_outstanding']
    else:
        return row['balance_outstanding'] * row['price']/100

def calculate_lme_real_yield(row):
    if row['Latest redemption date'] > pd.to_datetime('2051-03-31', format="%Y-%m-%d"):
        return 0.0215
    else:
        return row['real_yield']
    
def calculate_lme_coupon(row):
    if row['Latest redemption date'] > pd.to_datetime('2051-03-31', format="%Y-%m-%d"):
        return 2.15
    else:
        return row['%']

data_set_april['lme_balance'] = data_set_april.apply(calculate_lme_balance, axis=1)
data_set_april['lme_real_yield'] = data_set_april.apply(calculate_lme_real_yield, axis=1)
data_set_april['lme_coupon'] = data_set_april.apply(calculate_lme_coupon, axis=1)

In [None]:
infl = 0.035
data_set_april['uplift'] = data_set_april['balance_outstanding']*infl
data_set_april['cash_interest'] = data_set_april['balance_outstanding']*data_set_april['%']/100

data_set_april['new_uplift'] = data_set_april['lme_balance']*infl
data_set_april['new_cash_interest'] = data_set_april['lme_balance']*data_set_april['lme_coupon']/100

print(data_set_april['uplift'].sum() + data_set_april['cash_interest'].sum(), data_set_april['new_uplift'].sum() + data_set_april['new_cash_interest'].sum())

In [None]:
data_set_april['mat_year'] = data_set_april['Latest redemption date'].dt.year
data_set_april['mat_decade'] = (data_set_april['mat_year'] // 10 * 10)
data_set_april['issue_year'] = data_set_april['Issue date'].dt.year
data_set_april['uplift_chg'] = data_set_april['new_uplift'] - data_set_april['uplift']
data_set_april['cash_interest_chg'] = data_set_april['new_cash_interest'] - data_set_april['cash_interest']
data_set_april['interest_chg'] = data_set_april['cash_interest_chg'] + data_set_april['uplift_chg']

In [None]:
data_set_april[['uplift','new_uplift','uplift_chg']]

In [None]:
data_set_april.groupby('mat_decade').agg(
    {
        'uplift':'sum',
        'cash_interest':'sum',
        'new_uplift':'sum',
        'new_cash_interest':'sum',
        'interest_chg':'sum'
    }).plot(kind='bar', title='Aggregated Financial Metrics by Maturity Decade')