In [None]:
import pandas as pd
import requests
import zipfile
import io
import os
import csv
import ctypes as ct
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
import re
import seaborn as sns

from bgs.load_bgs_amounts import load_bgs_amounts
from bgs.load_gilt_details import load_csv_blocks
from bgs.load_bgs_prices import load_prices

def clean_date(bgs_index):
    if re.match(r'^\d{2}\s\w{3}\s+\d{4}$', bgs_index):
        return pd.to_datetime(bgs_index, format="%d %b %Y", errors='coerce').to_period('M').to_timestamp('M')
    else:
        return pd.to_datetime(bgs_index).to_period('M').to_timestamp('M')

def clean_percentage(x):
    if x.strip() in ['Variable','Floating']:
        return x
    try:
        x = float(x)
    except (ValueError, TypeError):
        units, fractions = x.split(' ')
        num, denom = map(float, fractions.split('/'))
        x = float(units) + num / denom
        print(x)
    return x
tables = load_bgs_amounts("downloads/BGSAmounts.csv")

conv = tables['Conventionals']
conv.replace("Redeemed", "", inplace=True)
conv = conv.apply(pd.to_numeric, errors='coerce')
conv.fillna(0, inplace=True)
conv.index = pd.Index([clean_date(x) for x in list(conv.index)]).to_period('M').to_timestamp('M')

old = tables['Calculated indexed nominal Old-style']
old.replace("Redeemed", "", inplace=True)
old = old.apply(pd.to_numeric, errors='coerce')
old.fillna(0, inplace=True)
old.index = pd.Index([clean_date(x) for x in list(old.index)]).to_period('M').to_timestamp('M')

new = tables['Calculated indexed nominal New-style']
new.replace("Redeemed", "", inplace=True)
new = new.apply(pd.to_numeric, errors='coerce')
new.fillna(0, inplace=True)
new.index = pd.Index([clean_date(x) for x in list(new.index)]).to_period('M').to_timestamp('M')

new_no_idx = tables['Index-linked New-style']
new_no_idx.replace("Redeemed", "", inplace=True)
new_no_idx = new_no_idx.apply(pd.to_numeric, errors='coerce')
new_no_idx.fillna(0, inplace=True)
new_no_idx.index = pd.Index([clean_date(x) for x in list(new_no_idx.index)]).to_period('M').to_timestamp('M')

price_df=load_prices("downloads/BGSPrices.csv")
price_df.index = pd.to_datetime(price_df.index, format="%d %b %Y").to_period('M').to_timestamp('M')
for text in ['Amalgamated', 'Redeemed', 'redeemed']:
    price_df = price_df.replace(text, 0)
price_df = price_df.replace('missing', None)
price_df = price_df.ffill(axis=0)
price_df = price_df.fillna(0)
details = load_csv_blocks("downloads/BGSDetails.csv")

conv_details = details['Conventionals']
conv_details['%'] = conv_details['%'].apply(clean_percentage)

new_details = details['Index-Linked New-style']
new_details['%'] = new_details['%'].apply(clean_percentage)

old_details = details['Index-Linked Old-style']
old_details['%'] = old_details['%'].apply(clean_percentage)


In [None]:
last = '2025-04-30'

In [None]:
outstanding = conv.loc[last, conv.loc[last] !=0].reset_index()['index'].to_list()
outstanding[0]

In [None]:
last_price = price_df.loc[last, [int(x) for x in outstanding]]
principal = conv.loc[last, outstanding]

In [None]:
principal.index = principal.index.astype(int)
principal.name = 'balance_outstanding'
last_price.name = 'price'

In [None]:
data_set_april = last_price.reset_index().merge(principal.reset_index(), on='index')
data_set_april.rename(columns={'index': 'Sequence'}, inplace=True)

In [None]:
(data_set_april['balance_outstanding']*data_set_april['price']).sum()/data_set_april['balance_outstanding'].sum()

In [None]:
(data_set_april['balance_outstanding']*0.16).sum()


In [None]:
data_set_april.head()

In [None]:
conv_details['Sequence'] = conv_details['Sequence'].astype(int)

In [None]:
len([x for x in conv_details['Sequence'] if x in [int(y) for y in outstanding]])

In [None]:
len(outstanding)

In [None]:
conv_details.columns

In [None]:
date_variables = ["Latest redemption date","Issue date","First coupon payable on date"]
static_variables = ['%', "Frequency","Sequence"]+date_variables
data_set_april = data_set_april.merge(conv_details[static_variables].set_index("Sequence"), on='Sequence', how='inner')
for date_var in date_variables:
    data_set_april[date_var] = data_set_april[date_var].apply(clean_date)
data_set_april.head()

In [None]:
(data_set_april['balance_outstanding']*data_set_april['%']).sum()/data_set_april['balance_outstanding'].sum()

In [None]:
data_set_april['balance_outstanding'].plot(kind='hist')

In [None]:
sns.set_theme(style="darkgrid")

# use the scatterplot function to build the bubble map
ax = sns.scatterplot(
    data=data_set_april,
    x="Latest redemption date",
    y="price",
    size="balance_outstanding",
    legend=False,
    sizes=(10, 500)
)

# show the graph
plt.show()

In [None]:
range = ax.get_xaxis().get_data_interval()


In [None]:
(data_set_april['Latest redemption date'].min() - pd.Timestamp("1970-01-01")).days 

In [None]:
X = ((data_set_april['Latest redemption date'] - pd.Timestamp("1970-01-01")).dt.days).to_list()
# X = [np.array(X)[:, np.newaxis]]
X = np.array(X).reshape(-1, 1)
y = data_set_april['price']

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn import preprocessing
from sklearn.svm import SVR

pipeline = make_pipeline(
    preprocessing.StandardScaler(),
    SVR(kernel='linear', epsilon=0.01, C=100, gamma = 0.01),
)

In [None]:
pipeline.fit(X, y)

In [None]:
y_hat = pipeline.predict(X)

In [None]:
sns.set_theme(style="darkgrid")

# use the scatterplot function to build the bubble map
ax = sns.scatterplot(
    data=data_set_april,
    x="Latest redemption date",
    y="price",
    size="balance_outstanding",
    legend=False,
    sizes=(10, 500)
)

# show the graph
ax.plot(data_set_april['Latest redemption date'], y_hat)
plt.show()

In [None]:
decision_line = pd.DataFrame(
    {
        "Latest redemption date": data_set_april['Latest redemption date'],
        "decision_line_price": y_hat
    })

In [None]:
data_set_april = data_set_april.merge(decision_line, on='Latest redemption date', how='inner')

In [None]:
((data_set_april['decision_line_price'] - data_set_april['price'])/data_set_april['price']).plot(kind='hist', bins=50, title='Difference between decision line and actual price')

In [None]:
(y_hat.max()-y_hat.min())/(range[1] -range[0])

In [None]:
sns.set_theme(style="darkgrid")

condition = (data_set_april['decision_line_price'] - data_set_april['price']) > 2
data_set_april['segmentation'] = condition.apply(lambda x: 'orange' if x else 'blue')

# use the scatterplot function to build the bubble map
ax = sns.scatterplot(
    data=data_set_april,
    x="Latest redemption date",
    y="price",
    size="balance_outstanding",
    legend=False,
    sizes=(10, 500),
    hue='segmentation',
)

# show the graph
ax.plot(data_set_april['Latest redemption date'], y_hat)
plt.show()